Total coverage: 205415 (12%)of 1821156
10 1 1 7 10 10 2 10 10 10 7 5 4 10 2 1 1 1 10 10 11 1 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 // SPDX-License-Identifier: GPL-2.0-or-later /* * spca1528 subdriver * * Copyright (C) 2010-2011 Jean-Francois Moine (http://moinejf.free.fr) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca1528" #include "gspca.h" #include "jpeg.h" MODULE_AUTHOR("Jean-Francois Moine <http://moinejf.free.fr>"); MODULE_DESCRIPTION("SPCA1528 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 pkt_seq; u8 jpeg_hdr[JPEG_HDR_SZ]; }; static const struct v4l2_pix_format vga_mode[] = { /* (does not work correctly) {176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 5 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, */ {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, }; /* read <len> bytes to gspca usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, u8 req, u16 index, int len) { #if USB_BUF_SZ < 64 #error "USB buffer too small" #endif struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, /* value */ index, gspca_dev->usb_buf, len, 500); gspca_dbg(gspca_dev, D_USBI, "GET %02x 0000 %04x %02x\n", req, index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_r err %d\n", ret); gspca_dev->usb_err = ret; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, USB_BUF_SZ); } } static void reg_w(struct gspca_dev *gspca_dev, u8 req, u16 value, u16 index) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_USBO, "SET %02x %04x %04x\n", req, value, index); ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); if (ret < 0) { pr_err("reg_w err %d\n", ret); gspca_dev->usb_err = ret; } } static void reg_wb(struct gspca_dev *gspca_dev, u8 req, u16 value, u16 index, u8 byte) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_USBO, "SET %02x %04x %04x %02x\n", req, value, index, byte); gspca_dev->usb_buf[0] = byte; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, gspca_dev->usb_buf, 1, 500); if (ret < 0) { pr_err("reg_w err %d\n", ret); gspca_dev->usb_err = ret; } } static void wait_status_0(struct gspca_dev *gspca_dev) { int i, w; i = 16; w = 0; do { reg_r(gspca_dev, 0x21, 0x0000, 1); if (gspca_dev->usb_buf[0] == 0) return; w += 15; msleep(w); } while (--i > 0); gspca_err(gspca_dev, "wait_status_0 timeout\n"); gspca_dev->usb_err = -ETIME; } static void wait_status_1(struct gspca_dev *gspca_dev) { int i; i = 10; do { reg_r(gspca_dev, 0x21, 0x0001, 1); msleep(10); if (gspca_dev->usb_buf[0] == 1) { reg_wb(gspca_dev, 0x21, 0x0000, 0x0001, 0x00); reg_r(gspca_dev, 0x21, 0x0001, 1); return; } } while (--i > 0); gspca_err(gspca_dev, "wait_status_1 timeout\n"); gspca_dev->usb_err = -ETIME; } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc0, 0x0000, 0x00c0, val); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc1, 0x0000, 0x00c1, val); } static void sethue(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc2, 0x0000, 0x0000, val); } static void setcolor(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc3, 0x0000, 0x00c3, val); } static void setsharpness(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc4, 0x0000, 0x00c4, val); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { gspca_dev->cam.cam_mode = vga_mode; gspca_dev->cam.nmodes = ARRAY_SIZE(vga_mode); gspca_dev->cam.npkt = 128; /* number of packets per ISOC message */ /*fixme: 256 in ms-win traces*/ return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { reg_w(gspca_dev, 0x00, 0x0001, 0x2067); reg_w(gspca_dev, 0x00, 0x00d0, 0x206b); reg_w(gspca_dev, 0x00, 0x0000, 0x206c); reg_w(gspca_dev, 0x00, 0x0001, 0x2069); msleep(8); reg_w(gspca_dev, 0x00, 0x00c0, 0x206b); reg_w(gspca_dev, 0x00, 0x0000, 0x206c); reg_w(gspca_dev, 0x00, 0x0001, 0x2069); reg_r(gspca_dev, 0x20, 0x0000, 1); reg_r(gspca_dev, 0x20, 0x0000, 5); reg_r(gspca_dev, 0x23, 0x0000, 64); gspca_dbg(gspca_dev, D_PROBE, "%s%s\n", &gspca_dev->usb_buf[0x1c], &gspca_dev->usb_buf[0x30]); reg_r(gspca_dev, 0x23, 0x0001, 64); return gspca_dev->usb_err; } /* function called at start time before URB creation */ static int sd_isoc_init(struct gspca_dev *gspca_dev) { u8 mode; reg_r(gspca_dev, 0x00, 0x2520, 1); wait_status_0(gspca_dev); reg_w(gspca_dev, 0xc5, 0x0003, 0x0000); wait_status_1(gspca_dev); wait_status_0(gspca_dev); mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_wb(gspca_dev, 0x25, 0x0000, 0x0004, mode); reg_r(gspca_dev, 0x25, 0x0004, 1); reg_wb(gspca_dev, 0x27, 0x0000, 0x0000, 0x06); /* 420 */ reg_r(gspca_dev, 0x27, 0x0000, 1); /* not useful.. gspca_dev->alt = 4; * use alternate setting 3 */ return gspca_dev->usb_err; } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* initialize the JPEG header */ jpeg_define(sd->jpeg_hdr, gspca_dev->pixfmt.height, gspca_dev->pixfmt.width, 0x22); /* JPEG 411 */ /* the JPEG quality shall be 85% */ jpeg_set_qual(sd->jpeg_hdr, 85); reg_r(gspca_dev, 0x00, 0x2520, 1); msleep(8); /* start the capture */ wait_status_0(gspca_dev); reg_w(gspca_dev, 0x31, 0x0000, 0x0004); /* start request */ wait_status_1(gspca_dev); wait_status_0(gspca_dev); msleep(200); sd->pkt_seq = 0; return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* stop the capture */ wait_status_0(gspca_dev); reg_w(gspca_dev, 0x31, 0x0000, 0x0000); /* stop request */ wait_status_1(gspca_dev); wait_status_0(gspca_dev); } /* move a packet adding 0x00 after 0xff */ static void add_packet(struct gspca_dev *gspca_dev, u8 *data, int len) { int i; i = 0; do { if (data[i] == 0xff) { gspca_frame_add(gspca_dev, INTER_PACKET, data, i + 1); len -= i; data += i; *data = 0x00; i = 0; } } while (++i < len); gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; static const u8 ffd9[] = {0xff, 0xd9}; /* image packets start with: * 02 8n * with <n> bit: * 0x01: even (0) / odd (1) image * 0x02: end of image when set */ if (len < 3) return; /* empty packet */ if (*data == 0x02) { if (data[1] & 0x02) { sd->pkt_seq = !(data[1] & 1); add_packet(gspca_dev, data + 2, len - 2); gspca_frame_add(gspca_dev, LAST_PACKET, ffd9, 2); return; } if ((data[1] & 1) != sd->pkt_seq) goto err; if (gspca_dev->last_packet_type == LAST_PACKET) gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); add_packet(gspca_dev, data + 2, len - 2); return; } err: gspca_dev->last_packet_type = DISCARD_PACKET; } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_HUE: sethue(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolor(gspca_dev, ctrl->val); break; case V4L2_CID_SHARPNESS: setsharpness(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 8, 1, 1); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 0, 255, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 8, 1, 1); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SHARPNESS, 0, 255, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .isoc_init = sd_isoc_init, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x04fc, 0x1528)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { /* the video interface for isochronous transfer is 1 */ if (intf->cur_altsetting->desc.bInterfaceNumber != 1) return -ENODEV; return gspca_dev_probe2(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
3 3 3 3 3 6 3 3 9 3 3 3 3 3 3 3 3 6 6 6 6 10 10 3 10 10 10 2 4 1 1 1 6 3 9 3 6 3 6 1 1 1 1 1 1 7 2 2 1 2 1 2 2 57 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 // SPDX-License-Identifier: GPL-2.0 /* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the NetFilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Version 1, is capable of handling both version 0 and 1 messages. * Version 0 is the plain old format. * Note Version 0 receivers will just drop Ver 1 messages. * Version 1 is capable of handle IPv6, Persistence data, * time-outs, and firewall marks. * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 * * Definitions Message: is a complete datagram * Sync_conn: is a part of a Message * Param Data is an option to a Sync_conn. * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * ip_vs_sync: sync connection info from master load balancer to backups * through multicast * * Changes: * Alexandre Cassen : Added master & backup support at a time. * Alexandre Cassen : Added SyncID support for incoming sync * messages filtering. * Justin Ossevoort : Fix endian problem on sync message size. * Hans Schillstrom : Added Version 1: i.e. IPv6, * Persistence support, fwmark and time-out. */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/inetdevice.h> #include <linux/net.h> #include <linux/completion.h> #include <linux/delay.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/igmp.h> /* for ip_mc_join_group */ #include <linux/udp.h> #include <linux/err.h> #include <linux/kthread.h> #include <linux/wait.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/unaligned.h> /* Used for ntoh_seq and hton_seq */ #include <net/ip.h> #include <net/sock.h> #include <net/ip_vs.h> #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ #define IP_VS_SYNC_PORT 8848 /* multicast port */ #define SYNC_PROTO_VER 1 /* Protocol version in header */ static struct lock_class_key __ipvs_sync_key; /* * IPVS sync connection entry * Version 0, i.e. original version. */ struct ip_vs_sync_conn_v0 { __u8 reserved; /* Protocol, addresses and port numbers */ __u8 protocol; /* Which protocol (TCP/UDP) */ __be16 cport; __be16 vport; __be16 dport; __be32 caddr; /* client address */ __be32 vaddr; /* virtual address */ __be32 daddr; /* destination address */ /* Flags and state transition */ __be16 flags; /* status flags */ __be16 state; /* state info */ /* The sequence options start here */ }; struct ip_vs_sync_conn_options { struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ }; /* Sync Connection format (sync_conn) 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Type | Protocol | Ver. | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Flags | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | State | cport | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | vport | dport | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | fwmark | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | timeout (in sec.) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ... | | IP-Addresses (v4 or v6) | | ... | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Optional Parameters. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Param. Type | Param. Length | Param. data | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | ... | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | Param Type | Param. Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Param data | | Last Param data should be padded for 32 bit alignment | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ /* * Type 0, IPv4 sync connection format */ struct ip_vs_sync_v4 { __u8 type; __u8 protocol; /* Which protocol (TCP/UDP) */ __be16 ver_size; /* Version msb 4 bits */ /* Flags and state transition */ __be32 flags; /* status flags */ __be16 state; /* state info */ /* Protocol, addresses and port numbers */ __be16 cport; __be16 vport; __be16 dport; __be32 fwmark; /* Firewall mark from skb */ __be32 timeout; /* cp timeout */ __be32 caddr; /* client address */ __be32 vaddr; /* virtual address */ __be32 daddr; /* destination address */ /* The sequence options start here */ /* PE data padded to 32bit alignment after seq. options */ }; /* * Type 2 messages IPv6 */ struct ip_vs_sync_v6 { __u8 type; __u8 protocol; /* Which protocol (TCP/UDP) */ __be16 ver_size; /* Version msb 4 bits */ /* Flags and state transition */ __be32 flags; /* status flags */ __be16 state; /* state info */ /* Protocol, addresses and port numbers */ __be16 cport; __be16 vport; __be16 dport; __be32 fwmark; /* Firewall mark from skb */ __be32 timeout; /* cp timeout */ struct in6_addr caddr; /* client address */ struct in6_addr vaddr; /* virtual address */ struct in6_addr daddr; /* destination address */ /* The sequence options start here */ /* PE data padded to 32bit alignment after seq. options */ }; union ip_vs_sync_conn { struct ip_vs_sync_v4 v4; struct ip_vs_sync_v6 v6; }; /* Bits in Type field in above */ #define STYPE_INET6 0 #define STYPE_F_INET6 (1 << STYPE_INET6) #define SVER_SHIFT 12 /* Shift to get version */ #define SVER_MASK 0x0fff /* Mask to strip version */ #define IPVS_OPT_SEQ_DATA 1 #define IPVS_OPT_PE_DATA 2 #define IPVS_OPT_PE_NAME 3 #define IPVS_OPT_PARAM 7 #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { struct task_struct *task; struct netns_ipvs *ipvs; struct socket *sock; char *buf; int id; }; /* Version 0 definition of packet sizes */ #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) #define FULL_CONN_SIZE \ (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) /* The master mulitcasts messages (Datagrams) to the backup load balancers in the following format. Version 1: Note, first byte should be Zero, so ver 0 receivers will drop the packet. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 0 | SyncID | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Count Conns | Version | Reserved, set to Zero | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (1) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | . | ~ . ~ | . | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (n) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Version 0 Header 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Count Conns | SyncID | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | IPVS Sync Connection (1) | */ /* Version 0 header */ struct ip_vs_sync_mesg_v0 { __u8 nr_conns; __u8 syncid; __be16 size; /* ip_vs_sync_conn entries start here */ }; /* Version 1 header */ struct ip_vs_sync_mesg { __u8 reserved; /* must be zero */ __u8 syncid; __be16 size; __u8 nr_conns; __s8 version; /* SYNC_PROTO_VER */ __u16 spare; /* ip_vs_sync_conn entries start here */ }; union ipvs_sockaddr { struct sockaddr_in in; struct sockaddr_in6 in6; }; struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; /* pointers for the message data */ struct ip_vs_sync_mesg *mesg; unsigned char *head; unsigned char *end; }; /* * Copy of struct ip_vs_seq * From unaligned network order to aligned host order */ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) { memset(ho, 0, sizeof(*ho)); ho->init_seq = get_unaligned_be32(&no->init_seq); ho->delta = get_unaligned_be32(&no->delta); ho->previous_delta = get_unaligned_be32(&no->previous_delta); } /* * Copy of struct ip_vs_seq * From Aligned host order to unaligned network order */ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) { put_unaligned_be32(ho->init_seq, &no->init_seq); put_unaligned_be32(ho->delta, &no->delta); put_unaligned_be32(ho->previous_delta, &no->previous_delta); } static inline struct ip_vs_sync_buff * sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_lock); if (list_empty(&ms->sync_queue)) { sb = NULL; __set_current_state(TASK_INTERRUPTIBLE); } else { sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, list); list_del(&sb->list); ms->sync_queue_len--; if (!ms->sync_queue_len) ms->sync_queue_delay = 0; } spin_unlock_bh(&ipvs->sync_lock); return sb; } /* * Create a new sync buffer for Version 1 proto. */ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg), ipvs->mcfg.sync_maxlen); sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ sb->mesg->version = SYNC_PROTO_VER; sb->mesg->syncid = ipvs->mcfg.syncid; sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); sb->end = (unsigned char *)sb->mesg + len; sb->firstuse = jiffies; return sb; } static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) { kfree(sb->mesg); kfree(sb); } static inline void sb_queue_tail(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb = ms->sync_buff; spin_lock(&ipvs->sync_lock); if (ipvs->sync_state & IP_VS_STATE_MASTER && ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { if (!ms->sync_queue_len) schedule_delayed_work(&ms->master_wakeup_work, max(IPVS_SYNC_SEND_DELAY, 1)); ms->sync_queue_len++; list_add_tail(&sb->list, &ms->sync_queue); if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) { int id = (int)(ms - ipvs->ms); wake_up_process(ipvs->master_tinfo[id].task); } } else ip_vs_sync_buff_release(sb); spin_unlock(&ipvs->sync_lock); } /* * Get the current sync buffer if it has been created for more * than the specified time or the specified time is zero. */ static inline struct ip_vs_sync_buff * get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, unsigned long time) { struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_buff_lock); sb = ms->sync_buff; if (sb && time_after_eq(jiffies - sb->firstuse, time)) { ms->sync_buff = NULL; __set_current_state(TASK_RUNNING); } else sb = NULL; spin_unlock_bh(&ipvs->sync_buff_lock); return sb; } static inline int select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) { return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; } /* * Create a new sync buffer for Version 0 proto. */ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0), ipvs->mcfg.sync_maxlen); sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; mesg->syncid = ipvs->mcfg.syncid; mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); sb->end = (unsigned char *)mesg + len; sb->firstuse = jiffies; return sb; } /* Check if connection is controlled by persistence */ static inline bool in_persistence(struct ip_vs_conn *cp) { for (cp = cp->control; cp; cp = cp->control) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) return true; } return false; } /* Check if conn should be synced. * pkts: conn packets, use sysctl_sync_threshold to avoid packet check * - (1) sync_refresh_period: reduce sync rate. Additionally, retry * sync_retries times with period of sync_refresh_period/8 * - (2) if both sync_refresh_period and sync_period are 0 send sync only * for state changes or only once when pkts matches sync_threshold * - (3) templates: rate can be reduced only with sync_refresh_period or * with (2) */ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { unsigned long orig = READ_ONCE(cp->sync_endtime); unsigned long now = jiffies; unsigned long n = (now + cp->timeout) & ~3UL; unsigned int sync_refresh_period; int sync_period; int force; /* Check if we sync in current state */ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) force = 0; else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) return 0; else if (likely(cp->protocol == IPPROTO_TCP)) { if (!((1 << cp->state) & ((1 << IP_VS_TCP_S_ESTABLISHED) | (1 << IP_VS_TCP_S_FIN_WAIT) | (1 << IP_VS_TCP_S_CLOSE) | (1 << IP_VS_TCP_S_CLOSE_WAIT) | (1 << IP_VS_TCP_S_TIME_WAIT)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) goto set; } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { if (!((1 << cp->state) & ((1 << IP_VS_SCTP_S_ESTABLISHED) | (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | (1 << IP_VS_SCTP_S_CLOSED)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) goto set; } else { /* UDP or another protocol with single state */ force = 0; } sync_refresh_period = sysctl_sync_refresh_period(ipvs); if (sync_refresh_period > 0) { long diff = n - orig; long min_diff = max(cp->timeout >> 1, 10UL * HZ); /* Avoid sync if difference is below sync_refresh_period * and below the half timeout. */ if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { int retries = orig & 3; if (retries >= sysctl_sync_retries(ipvs)) return 0; if (time_before(now, orig - cp->timeout + (sync_refresh_period >> 3))) return 0; n |= retries + 1; } } sync_period = sysctl_sync_period(ipvs); if (sync_period > 0) { if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && pkts % sync_period != sysctl_sync_threshold(ipvs)) return 0; } else if (!sync_refresh_period && pkts != sysctl_sync_threshold(ipvs)) return 0; set: cp->old_state = cp->state; n = cmpxchg(&cp->sync_endtime, orig, n); return n == orig || force; } /* * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_buff *buff; struct ipvs_master_sync_state *ms; int id; unsigned int len; if (unlikely(cp->af != AF_INET)) return; /* Do not sync ONE PACKET */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return; if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) return; spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { spin_unlock_bh(&ipvs->sync_buff_lock); return; } id = select_master_thread_id(ipvs, cp); ms = &ipvs->ms[id]; buff = ms->sync_buff; len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; if (buff) { m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; /* Send buffer if it is for v1 */ if (buff->head + len > buff->end || !m->nr_conns) { sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; buff = NULL; } } if (!buff) { buff = ip_vs_sync_buff_create_v0(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } ms->sync_buff = buff; } m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; s = (struct ip_vs_sync_conn_v0 *) buff->head; /* copy members */ s->reserved = 0; s->protocol = cp->protocol; s->cport = cp->cport; s->vport = cp->vport; s->dport = cp->dport; s->caddr = cp->caddr.ip; s->vaddr = cp->vaddr.ip; s->daddr = cp->daddr.ip; s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); s->state = htons(cp->state); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { struct ip_vs_sync_conn_options *opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(opt, &cp->sync_conn_opt, sizeof(*opt)); } m->nr_conns++; m->size = htons(ntohs(m->size) + len); buff->head += len; spin_unlock_bh(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ cp = cp->control; if (cp) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) pkts = atomic_inc_return(&cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); ip_vs_sync_conn(ipvs, cp, pkts); } } /* * Add an ip_vs_conn information into the current sync_buff. * Called by ip_vs_in. * Sending Version 1 messages */ void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; struct ip_vs_sync_buff *buff; struct ipvs_master_sync_state *ms; int id; __u8 *p; unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ if (sysctl_sync_ver(ipvs) == 0) { ip_vs_sync_conn_v0(ipvs, cp, pkts); return; } /* Do not sync ONE PACKET */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) goto control; sloop: if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) goto control; /* Sanity checks */ pe_name_len = 0; if (cp->pe_data_len) { if (!cp->pe_data || !cp->dest) { IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); return; } pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { spin_unlock_bh(&ipvs->sync_buff_lock); return; } id = select_master_thread_id(ipvs, cp); ms = &ipvs->ms[id]; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) len = sizeof(struct ip_vs_sync_v6); else #endif len = sizeof(struct ip_vs_sync_v4); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) len += sizeof(struct ip_vs_sync_conn_options) + 2; if (cp->pe_data_len) len += cp->pe_data_len + 2; /* + Param hdr field */ if (pe_name_len) len += pe_name_len + 2; /* check if there is a space for this one */ pad = 0; buff = ms->sync_buff; if (buff) { m = buff->mesg; pad = (4 - (size_t) buff->head) & 3; /* Send buffer if it is for v0 */ if (buff->head + len + pad > buff->end || m->reserved) { sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; buff = NULL; pad = 0; } } if (!buff) { buff = ip_vs_sync_buff_create(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } ms->sync_buff = buff; m = buff->mesg; } p = buff->head; buff->head += pad + len; m->size = htons(ntohs(m->size) + pad + len); /* Add ev. padding from prev. sync_conn */ while (pad--) *(p++) = 0; s = (union ip_vs_sync_conn *)p; /* Set message type & copy members */ s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); s->v4.state = htons(cp->state); s->v4.protocol = cp->protocol; s->v4.cport = cp->cport; s->v4.vport = cp->vport; s->v4.dport = cp->dport; s->v4.fwmark = htonl(cp->fwmark); s->v4.timeout = htonl(cp->timeout / HZ); m->nr_conns++; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) { p += sizeof(struct ip_vs_sync_v6); s->v6.caddr = cp->caddr.in6; s->v6.vaddr = cp->vaddr.in6; s->v6.daddr = cp->daddr.in6; } else #endif { p += sizeof(struct ip_vs_sync_v4); /* options ptr */ s->v4.caddr = cp->caddr.ip; s->v4.vaddr = cp->vaddr.ip; s->v4.daddr = cp->daddr.ip; } if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { *(p++) = IPVS_OPT_SEQ_DATA; *(p++) = sizeof(struct ip_vs_sync_conn_options); hton_seq((struct ip_vs_seq *)p, &cp->in_seq); p += sizeof(struct ip_vs_seq); hton_seq((struct ip_vs_seq *)p, &cp->out_seq); p += sizeof(struct ip_vs_seq); } /* Handle pe data */ if (cp->pe_data_len && cp->pe_data) { *(p++) = IPVS_OPT_PE_DATA; *(p++) = cp->pe_data_len; memcpy(p, cp->pe_data, cp->pe_data_len); p += cp->pe_data_len; if (pe_name_len) { /* Add PE_NAME */ *(p++) = IPVS_OPT_PE_NAME; *(p++) = pe_name_len; memcpy(p, cp->pe->name, pe_name_len); p += pe_name_len; } } spin_unlock_bh(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ cp = cp->control; if (!cp) return; if (cp->flags & IP_VS_CONN_F_TEMPLATE) pkts = atomic_inc_return(&cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); goto sloop; } /* * fill_param used by version 1 */ static inline int ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, sc->v4.vport, p); /* Handle pe data */ if (pe_data_len) { if (pe_name_len) { char buff[IP_VS_PENAME_MAXLEN+1]; memcpy(buff, pe_name, pe_name_len); buff[pe_name_len]=0; p->pe = __ip_vs_pe_getbyname(buff); if (!p->pe) { IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); return 1; } } else { IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); return 1; } p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { module_put(p->pe->module); return -ENOMEM; } p->pe_data_len = pe_data_len; } return 0; } /* * Connection Add / Update. * Common for version 0 and 1 reception of backup sync_conns. * Param: ... * timeout is in sec. */ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param, unsigned int flags, unsigned int state, unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, unsigned long timeout, __u32 fwmark, struct ip_vs_sync_conn_options *opt) { struct ip_vs_dest *dest; struct ip_vs_conn *cp; if (!(flags & IP_VS_CONN_F_TEMPLATE)) { cp = ip_vs_conn_in_get(param); if (cp && ((cp->dport != dport) || !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) { if (!(flags & IP_VS_CONN_F_INACTIVE)) { ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); cp = NULL; } else { /* This is the expiration message for the * connection that was already replaced, so we * just ignore it. */ __ip_vs_conn_put(cp); kfree(param->pe_data); return; } } } else { cp = ip_vs_ct_in_get(param); } if (cp) { /* Free pe_data */ kfree(param->pe_data); dest = cp->dest; spin_lock_bh(&cp->lock); if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { if (flags & IP_VS_CONN_F_INACTIVE) { atomic_dec(&dest->activeconns); atomic_inc(&dest->inactconns); } else { atomic_inc(&dest->activeconns); atomic_dec(&dest->inactconns); } } flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; cp->flags = flags; spin_unlock_bh(&cp->lock); if (!dest) ip_vs_try_bind_dest(cp); } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ rcu_read_lock(); /* This function is only invoked by the synchronization * code. We do not currently support heterogeneous pools * with synchronization, so we can make the assumption that * the svc_af is the same as the dest_af */ dest = ip_vs_find_dest(ipvs, type, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest, fwmark); rcu_read_unlock(); if (!cp) { kfree(param->pe_data); IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } if (!(flags & IP_VS_CONN_F_TEMPLATE)) kfree(param->pe_data); } if (opt) { cp->in_seq = opt->in_seq; cp->out_seq = opt->out_seq; } atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; /* * For Ver 0 messages style * - Not possible to recover the right timeout for templates * - can not find the right fwmark * virtual service. If needed, we can do it for * non-fwmark persistent services. * Ver 1 messages style. * - No problem. */ if (timeout) { if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) timeout = MAX_SCHEDULE_TIMEOUT / HZ; cp->timeout = timeout*HZ; } else { struct ip_vs_proto_data *pd; pd = ip_vs_proto_data_get(ipvs, protocol); if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) cp->timeout = pd->timeout_table[state]; else cp->timeout = (3*60*HZ); } ip_vs_conn_put(cp); } /* * Process received multicast message for Version 0 */ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer, const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_conn_options *opt; struct ip_vs_protocol *pp; struct ip_vs_conn_param param; char *p; int i; p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); for (i=0; i<m->nr_conns; i++) { unsigned int flags, state; if (p + SIMPLE_CONN_SIZE > buffer+buflen) { IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); return; } s = (struct ip_vs_sync_conn_v0 *) p; flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; flags &= ~IP_VS_CONN_F_HASHED; if (flags & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; p += FULL_CONN_SIZE; if (p > buffer+buflen) { IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); return; } } else { opt = NULL; p += SIMPLE_CONN_SIZE; } state = ntohs(s->state); if (!(flags & IP_VS_CONN_F_TEMPLATE)) { pp = ip_vs_proto_get(s->protocol); if (!pp) { IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", s->protocol); continue; } if (state >= pp->num_states) { IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", pp->name, state); continue; } } else { if (state >= IP_VS_CTPL_S_LAST) IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n", state); } ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, s->vport, &param); /* Send timeout as Zero */ ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, 0, 0, opt); } } /* * Handle options */ static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, __u32 *opt_flags, struct ip_vs_sync_conn_options *opt) { struct ip_vs_sync_conn_options *topt; topt = (struct ip_vs_sync_conn_options *)p; if (plen != sizeof(struct ip_vs_sync_conn_options)) { IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); return -EINVAL; } if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { IP_VS_DBG(2, "BACKUP, conn options found twice\n"); return -EINVAL; } ntoh_seq(&topt->in_seq, &opt->in_seq); ntoh_seq(&topt->out_seq, &opt->out_seq); *opt_flags |= IPVS_OPT_F_SEQ_DATA; return 0; } static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, __u8 **data, unsigned int maxlen, __u32 *opt_flags, __u32 flag) { if (plen > maxlen) { IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); return -EINVAL; } if (*opt_flags & flag) { IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); return -EINVAL; } *data_len = plen; *data = p; *opt_flags |= flag; return 0; } /* * Process a Version 1 sync. connection */ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; struct ip_vs_protocol *pp; struct ip_vs_conn_param param; __u32 flags; unsigned int af, state, pe_data_len=0, pe_name_len=0; __u8 *pe_data=NULL, *pe_name=NULL; __u32 opt_flags=0; int retc=0; s = (union ip_vs_sync_conn *) p; if (s->v6.type & STYPE_F_INET6) { #ifdef CONFIG_IP_VS_IPV6 af = AF_INET6; p += sizeof(struct ip_vs_sync_v6); #else IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); retc = 10; goto out; #endif } else if (!s->v4.type) { af = AF_INET; p += sizeof(struct ip_vs_sync_v4); } else { return -10; } if (p > msg_end) return -20; /* Process optional params check Type & Len. */ while (p < msg_end) { int ptype; int plen; if (p+2 > msg_end) return -30; ptype = *(p++); plen = *(p++); if (!plen || ((p + plen) > msg_end)) return -40; /* Handle seq option p = param data */ switch (ptype & ~IPVS_OPT_F_PARAM) { case IPVS_OPT_SEQ_DATA: if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) return -50; break; case IPVS_OPT_PE_DATA: if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, IP_VS_PEDATA_MAXLEN, &opt_flags, IPVS_OPT_F_PE_DATA)) return -60; break; case IPVS_OPT_PE_NAME: if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, IP_VS_PENAME_MAXLEN, &opt_flags, IPVS_OPT_F_PE_NAME)) return -70; break; default: /* Param data mandatory ? */ if (!(ptype & IPVS_OPT_F_PARAM)) { IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", ptype & ~IPVS_OPT_F_PARAM); retc = 20; goto out; } } p += plen; /* Next option */ } /* Get flags and Mask off unsupported */ flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; flags |= IP_VS_CONN_F_SYNC; state = ntohs(s->v4.state); if (!(flags & IP_VS_CONN_F_TEMPLATE)) { pp = ip_vs_proto_get(s->v4.protocol); if (!pp) { IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", s->v4.protocol); retc = 30; goto out; } if (state >= pp->num_states) { IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", pp->name, state); retc = 40; goto out; } } else { if (state >= IP_VS_CTPL_S_LAST) IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n", state); } if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data, pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #ifdef CONFIG_IP_VS_IPV6 else ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #endif ip_vs_pe_put(param.pe); return 0; /* Error exit */ out: IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); return retc; } /* * Process received multicast message and create the corresponding * ip_vs_conn entries. * Handles Version 0 & 1 */ static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer, const size_t buflen) { struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { IP_VS_DBG(2, "BACKUP, message header too short\n"); return; } if (buflen != ntohs(m2->size)) { IP_VS_DBG(2, "BACKUP, bogus message size\n"); return; } /* SyncID sanity check */ if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } /* Handle version 1 message */ if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) && (m2->spare == 0)) { msg_end = buffer + sizeof(struct ip_vs_sync_mesg); nr_conns = m2->nr_conns; for (i=0; i<nr_conns; i++) { union ip_vs_sync_conn *s; unsigned int size; int retc; p = msg_end; if (p + sizeof(s->v4) > buffer+buflen) { IP_VS_ERR_RL("BACKUP, Dropping buffer, too small\n"); return; } s = (union ip_vs_sync_conn *)p; size = ntohs(s->v4.ver_size) & SVER_MASK; msg_end = p + size; /* Basic sanity checks */ if (msg_end > buffer+buflen) { IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); return; } if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", ntohs(s->v4.ver_size) >> SVER_SHIFT); return; } /* Process a single sync_conn */ retc = ip_vs_proc_sync_conn(ipvs, p, msg_end); if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); return; } /* Make sure we have 32 bit alignment */ msg_end = p + ((size + 3) & ~3); } } else { /* Old type of message */ ip_vs_process_message_v0(ipvs, buffer, buflen); return; } } /* * Setup sndbuf (mode=1) or rcvbuf (mode=0) */ static void set_sock_size(struct sock *sk, int mode, int val) { /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ lock_sock(sk); if (mode) { val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, READ_ONCE(sysctl_wmem_max)); sk->sk_sndbuf = val * 2; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } else { val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, READ_ONCE(sysctl_rmem_max)); sk->sk_rcvbuf = val * 2; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); } /* * Setup loopback of outgoing multicasts on a sending socket */ static void set_mcast_loop(struct sock *sk, u_char loop) { /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ inet_assign_bit(MC_LOOP, sk, loop); #ifdef CONFIG_IP_VS_IPV6 if (READ_ONCE(sk->sk_family) == AF_INET6) { /* IPV6_MULTICAST_LOOP */ inet6_assign_bit(MC6_LOOP, sk, loop); } #endif } /* * Specify TTL for outgoing multicasts on a sending socket */ static void set_mcast_ttl(struct sock *sk, u_char ttl) { struct inet_sock *inet = inet_sk(sk); /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); WRITE_ONCE(inet->mc_ttl, ttl); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_HOPS */ WRITE_ONCE(np->mcast_hops, ttl); } #endif release_sock(sk); } /* Control fragmentation of messages */ static void set_mcast_pmtudisc(struct sock *sk, int val) { struct inet_sock *inet = inet_sk(sk); /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ lock_sock(sk); WRITE_ONCE(inet->pmtudisc, val); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk); } /* * Specifiy default interface for outgoing multicasts */ static int set_mcast_if(struct sock *sk, struct net_device *dev) { struct inet_sock *inet = inet_sk(sk); if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; lock_sock(sk); inet->mc_index = dev->ifindex; /* inet->mc_addr = 0; */ #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_IF */ WRITE_ONCE(np->mcast_oif, dev->ifindex); } #endif release_sock(sk); return 0; } /* * Join a multicast group. * the group is specified by a class D multicast address 224.0.0.0/8 * in the in_addr structure passed in as a parameter. */ static int join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev) { struct ip_mreqn mreq; int ret; memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; mreq.imr_ifindex = dev->ifindex; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); return ret; } #ifdef CONFIG_IP_VS_IPV6 static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, struct net_device *dev) { int ret; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; lock_sock(sk); ret = ipv6_sock_mc_join(sk, dev->ifindex, addr); release_sock(sk); return ret; } #endif static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) { __be32 addr; struct sockaddr_in sin; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); if (!addr) pr_err("You probably need to specify IP address on " "multicast interface.\n"); IP_VS_DBG(7, "binding socket with (%s) %pI4\n", dev->name, &addr); /* Now bind the socket with the address of multicast interface */ sin.sin_family = AF_INET; sin.sin_addr.s_addr = addr; sin.sin_port = 0; return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin)); } static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, struct ipvs_sync_daemon_cfg *c, int id) { if (AF_INET6 == c->mcast_af) { sa->in6 = (struct sockaddr_in6) { .sin6_family = AF_INET6, .sin6_port = htons(c->mcast_port + id), }; sa->in6.sin6_addr = c->mcast_group.in6; *salen = sizeof(sa->in6); } else { sa->in = (struct sockaddr_in) { .sin_family = AF_INET, .sin_port = htons(c->mcast_port + id), }; sa->in.sin_addr = c->mcast_group.in; *salen = sizeof(sa->in); } } /* * Set up sending multicast socket over UDP */ static int make_send_sock(struct netns_ipvs *ipvs, int id, struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); goto error; } *sock_ret = sock; result = set_mcast_if(sock->sk, dev); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; } set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl); /* Allow fragmentation if MTU changes */ set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT); result = sysctl_sync_sock_size(ipvs); if (result > 0) set_sock_size(sock->sk, 1, result); if (AF_INET == ipvs->mcfg.mcast_af) result = bind_mcastif_addr(sock, dev); else result = 0; if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; } return 0; error: return result; } /* * Set up receiving multicast socket over UDP */ static int make_receive_sock(struct netns_ipvs *ipvs, int id, struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); goto error; } *sock_ret = sock; /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); if (result > 0) set_sock_size(sock->sk, 0, result); get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); sock->sk->sk_bound_dev_if = dev->ifindex; result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; } /* join the multicast group */ #ifdef CONFIG_IP_VS_IPV6 if (ipvs->bcfg.mcast_af == AF_INET6) result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, dev); else #endif result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, dev); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; } return 0; error: return result; } static int ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) { struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL}; struct kvec iov; int len; iov.iov_base = (void *)buffer; iov.iov_len = length; len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); return len; } static int ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) { int msize; int ret; msize = ntohs(msg->size); ret = ip_vs_send_async(sock, (char *)msg, msize); if (ret >= 0 || ret == -EAGAIN) return ret; pr_err("ip_vs_send_async error %d\n", ret); return 0; } static int ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) { struct msghdr msg = {NULL,}; struct kvec iov = {buffer, buflen}; int len; /* Receive a packet */ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, buflen); len = sock_recvmsg(sock, &msg, MSG_DONTWAIT); if (len < 0) return len; return len; } /* Wakeup the master thread for sending */ static void master_wakeup_work_handler(struct work_struct *work) { struct ipvs_master_sync_state *ms = container_of(work, struct ipvs_master_sync_state, master_wakeup_work.work); struct netns_ipvs *ipvs = ms->ipvs; spin_lock_bh(&ipvs->sync_lock); if (ms->sync_queue_len && ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { int id = (int)(ms - ipvs->ms); ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; wake_up_process(ipvs->master_tinfo[id].task); } spin_unlock_bh(&ipvs->sync_lock); } /* Get next buffer to send */ static inline struct ip_vs_sync_buff * next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb; sb = sb_dequeue(ipvs, ms); if (sb) return sb; /* Do not delay entries in buffer for more than 2 seconds */ return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); } static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; struct sock *sk = tinfo->sock->sk; struct ip_vs_sync_buff *sb; pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d, id = %d\n", ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id); for (;;) { sb = next_sync_buff(ipvs, ms); if (unlikely(kthread_should_stop())) break; if (!sb) { schedule_timeout(IPVS_SYNC_CHECK_PERIOD); continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { /* (Ab)use interruptible sleep to avoid increasing * the load avg. */ __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || kthread_should_stop()); if (unlikely(kthread_should_stop())) goto done; } ip_vs_sync_buff_release(sb); } done: __set_current_state(TASK_RUNNING); if (sb) ip_vs_sync_buff_release(sb); /* clean up the sync_buff queue */ while ((sb = sb_dequeue(ipvs, ms))) ip_vs_sync_buff_release(sb); __set_current_state(TASK_RUNNING); /* clean up the current sync_buff */ sb = get_curr_sync_buff(ipvs, ms, 0); if (sb) ip_vs_sync_buff_release(sb); return 0; } static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; struct sock *sk = tinfo->sock->sk; struct udp_sock *up = udp_sk(sk); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d, id = %d\n", ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(sk), !skb_queue_empty_lockless(&sk->sk_receive_queue) || !skb_queue_empty_lockless(&up->reader_queue) || kthread_should_stop()); /* do we have data now? */ while (!skb_queue_empty_lockless(&sk->sk_receive_queue) || !skb_queue_empty_lockless(&up->reader_queue)) { len = ip_vs_receive(tinfo->sock, tinfo->buf, ipvs->bcfg.sync_maxlen); if (len <= 0) { if (len != -EAGAIN) pr_err("receiving message error\n"); break; } ip_vs_process_message(ipvs, tinfo->buf, len); } } return 0; } int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { struct ip_vs_sync_thread_data *ti = NULL, *tinfo; struct task_struct *task; struct net_device *dev; char *name; int (*threadfn)(void *data); int id = 0, count, hlen; int result = -ENOMEM; u16 mtu, min_mtu; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); /* increase the module use count */ if (!ip_vs_use_count_inc()) return -ENOPROTOOPT; /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); if (mutex_trylock(&ipvs->sync_mutex)) break; rtnl_unlock(); mutex_lock(&ipvs->sync_mutex); if (rtnl_trylock()) break; mutex_unlock(&ipvs->sync_mutex); } if (!ipvs->sync_state) { count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); ipvs->threads_mask = count - 1; } else count = ipvs->threads_mask + 1; if (c->mcast_af == AF_UNSPEC) { c->mcast_af = AF_INET; c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP); } if (!c->mcast_port) c->mcast_port = IP_VS_SYNC_PORT; if (!c->mcast_ttl) c->mcast_ttl = 1; dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); result = -ENODEV; goto out_early; } hlen = (AF_INET6 == c->mcast_af) ? sizeof(struct ipv6hdr) + sizeof(struct udphdr) : sizeof(struct iphdr) + sizeof(struct udphdr); mtu = (state == IP_VS_STATE_BACKUP) ? clamp(dev->mtu, 1500U, 65535U) : 1500U; min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1; if (c->sync_maxlen) c->sync_maxlen = clamp_t(unsigned int, c->sync_maxlen, min_mtu, 65535 - hlen); else c->sync_maxlen = mtu - hlen; if (state == IP_VS_STATE_MASTER) { result = -EEXIST; if (ipvs->ms) goto out_early; ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { result = -EEXIST; if (ipvs->backup_tinfo) goto out_early; ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { result = -EINVAL; goto out_early; } if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; result = -ENOMEM; ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; ms = ipvs->ms; for (id = 0; id < count; id++, ms++) { INIT_LIST_HEAD(&ms->sync_queue); ms->sync_queue_len = 0; ms->sync_queue_delay = 0; INIT_DELAYED_WORK(&ms->master_wakeup_work, master_wakeup_work_handler); ms->ipvs = ipvs; } } result = -ENOMEM; ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL); if (!ti) goto out; for (id = 0; id < count; id++) { tinfo = &ti[id]; tinfo->ipvs = ipvs; if (state == IP_VS_STATE_BACKUP) { result = -ENOMEM; tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) goto out; } tinfo->id = id; if (state == IP_VS_STATE_MASTER) result = make_send_sock(ipvs, id, dev, &tinfo->sock); else result = make_receive_sock(ipvs, id, dev, &tinfo->sock); if (result < 0) goto out; task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); if (IS_ERR(task)) { result = PTR_ERR(task); goto out; } tinfo->task = task; } /* mark as active */ if (state == IP_VS_STATE_MASTER) ipvs->master_tinfo = ti; else ipvs->backup_tinfo = ti; spin_lock_bh(&ipvs->sync_buff_lock); ipvs->sync_state |= state; spin_unlock_bh(&ipvs->sync_buff_lock); mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); return 0; out: /* We do not need RTNL lock anymore, release it here so that * sock_release below can use rtnl_lock to leave the mcast group. */ rtnl_unlock(); id = min(id, count - 1); if (ti) { for (tinfo = ti + id; tinfo >= ti; tinfo--) { if (tinfo->task) kthread_stop(tinfo->task); } } if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { kfree(ipvs->ms); ipvs->ms = NULL; } mutex_unlock(&ipvs->sync_mutex); /* No more mutexes, release socks */ if (ti) { for (tinfo = ti + id; tinfo >= ti; tinfo--) { if (tinfo->sock) sock_release(tinfo->sock); kfree(tinfo->buf); } kfree(ti); } /* decrease the module use count */ ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); /* decrease the module use count */ ip_vs_use_count_dec(); return result; } int stop_sync_thread(struct netns_ipvs *ipvs, int state) { struct ip_vs_sync_thread_data *ti, *tinfo; int id; int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); mutex_lock(&ipvs->sync_mutex); if (state == IP_VS_STATE_MASTER) { retc = -ESRCH; if (!ipvs->ms) goto err; ti = ipvs->master_tinfo; /* * The lock synchronizes with sb_queue_tail(), so that we don't * add sync buffers to the queue, when we are already in * progress of stopping the master sync daemon. */ spin_lock_bh(&ipvs->sync_buff_lock); spin_lock(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; spin_unlock(&ipvs->sync_lock); spin_unlock_bh(&ipvs->sync_buff_lock); retc = 0; for (id = ipvs->threads_mask; id >= 0; id--) { struct ipvs_master_sync_state *ms = &ipvs->ms[id]; int ret; tinfo = &ti[id]; pr_info("stopping master sync thread %d ...\n", task_pid_nr(tinfo->task)); cancel_delayed_work_sync(&ms->master_wakeup_work); ret = kthread_stop(tinfo->task); if (retc >= 0) retc = ret; } kfree(ipvs->ms); ipvs->ms = NULL; ipvs->master_tinfo = NULL; } else if (state == IP_VS_STATE_BACKUP) { retc = -ESRCH; if (!ipvs->backup_tinfo) goto err; ti = ipvs->backup_tinfo; ipvs->sync_state &= ~IP_VS_STATE_BACKUP; retc = 0; for (id = ipvs->threads_mask; id >= 0; id--) { int ret; tinfo = &ti[id]; pr_info("stopping backup sync thread %d ...\n", task_pid_nr(tinfo->task)); ret = kthread_stop(tinfo->task); if (retc >= 0) retc = ret; } ipvs->backup_tinfo = NULL; } else { goto err; } id = ipvs->threads_mask; mutex_unlock(&ipvs->sync_mutex); /* No more mutexes, release socks */ for (tinfo = ti + id; tinfo >= ti; tinfo--) { if (tinfo->sock) sock_release(tinfo->sock); kfree(tinfo->buf); } kfree(ti); /* decrease the module use count */ ip_vs_use_count_dec(); return retc; err: mutex_unlock(&ipvs->sync_mutex); return retc; } /* * Initialize data struct for each netns */ int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs) { __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); return 0; } void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs) { int retc; retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); }
1 2 3 2 3 3 3 2 2 2 2 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 // SPDX-License-Identifier: GPL-2.0 /* * Fintek F81232 USB to serial adaptor driver * Fintek F81532A/534A/535/536 USB to 2/4/8/12 serial adaptor driver * * Copyright (C) 2012 Greg Kroah-Hartman (gregkh@linuxfoundation.org) * Copyright (C) 2012 Linux Foundation */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/serial.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/serial_reg.h> #define F81232_ID \ { USB_DEVICE(0x1934, 0x0706) } /* 1 port UART device */ #define F81534A_SERIES_ID \ { USB_DEVICE(0x2c42, 0x1602) }, /* In-Box 2 port UART device */ \ { USB_DEVICE(0x2c42, 0x1604) }, /* In-Box 4 port UART device */ \ { USB_DEVICE(0x2c42, 0x1605) }, /* In-Box 8 port UART device */ \ { USB_DEVICE(0x2c42, 0x1606) }, /* In-Box 12 port UART device */ \ { USB_DEVICE(0x2c42, 0x1608) }, /* Non-Flash type */ \ { USB_DEVICE(0x2c42, 0x1632) }, /* 2 port UART device */ \ { USB_DEVICE(0x2c42, 0x1634) }, /* 4 port UART device */ \ { USB_DEVICE(0x2c42, 0x1635) }, /* 8 port UART device */ \ { USB_DEVICE(0x2c42, 0x1636) } /* 12 port UART device */ #define F81534A_CTRL_ID \ { USB_DEVICE(0x2c42, 0x16f8) } /* Global control device */ static const struct usb_device_id f81232_id_table[] = { F81232_ID, { } /* Terminating entry */ }; static const struct usb_device_id f81534a_id_table[] = { F81534A_SERIES_ID, { } /* Terminating entry */ }; static const struct usb_device_id f81534a_ctrl_id_table[] = { F81534A_CTRL_ID, { } /* Terminating entry */ }; static const struct usb_device_id combined_id_table[] = { F81232_ID, F81534A_SERIES_ID, F81534A_CTRL_ID, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, combined_id_table); /* Maximum baudrate for F81232 */ #define F81232_MAX_BAUDRATE 1500000 #define F81232_DEF_BAUDRATE 9600 /* USB Control EP parameter */ #define F81232_REGISTER_REQUEST 0xa0 #define F81232_GET_REGISTER 0xc0 #define F81232_SET_REGISTER 0x40 #define F81534A_ACCESS_REG_RETRY 2 #define SERIAL_BASE_ADDRESS 0x0120 #define RECEIVE_BUFFER_REGISTER (0x00 + SERIAL_BASE_ADDRESS) #define INTERRUPT_ENABLE_REGISTER (0x01 + SERIAL_BASE_ADDRESS) #define FIFO_CONTROL_REGISTER (0x02 + SERIAL_BASE_ADDRESS) #define LINE_CONTROL_REGISTER (0x03 + SERIAL_BASE_ADDRESS) #define MODEM_CONTROL_REGISTER (0x04 + SERIAL_BASE_ADDRESS) #define LINE_STATUS_REGISTER (0x05 + SERIAL_BASE_ADDRESS) #define MODEM_STATUS_REGISTER (0x06 + SERIAL_BASE_ADDRESS) /* * F81232 Clock registers (106h) * * Bit1-0: Clock source selector * 00: 1.846MHz. * 01: 18.46MHz. * 10: 24MHz. * 11: 14.77MHz. */ #define F81232_CLK_REGISTER 0x106 #define F81232_CLK_1_846_MHZ 0 #define F81232_CLK_18_46_MHZ BIT(0) #define F81232_CLK_24_MHZ BIT(1) #define F81232_CLK_14_77_MHZ (BIT(1) | BIT(0)) #define F81232_CLK_MASK GENMASK(1, 0) #define F81534A_MODE_REG 0x107 #define F81534A_TRIGGER_MASK GENMASK(3, 2) #define F81534A_TRIGGER_MULTIPLE_4X BIT(3) #define F81534A_FIFO_128BYTE (BIT(1) | BIT(0)) /* Serial port self GPIO control, 2bytes [control&output data][input data] */ #define F81534A_GPIO_REG 0x10e #define F81534A_GPIO_MODE2_DIR BIT(6) /* 1: input, 0: output */ #define F81534A_GPIO_MODE1_DIR BIT(5) #define F81534A_GPIO_MODE0_DIR BIT(4) #define F81534A_GPIO_MODE2_OUTPUT BIT(2) #define F81534A_GPIO_MODE1_OUTPUT BIT(1) #define F81534A_GPIO_MODE0_OUTPUT BIT(0) #define F81534A_CTRL_CMD_ENABLE_PORT 0x116 struct f81232_private { struct mutex lock; u8 modem_control; u8 modem_status; u8 shadow_lcr; speed_t baud_base; struct work_struct lsr_work; struct work_struct interrupt_work; struct usb_serial_port *port; }; static u32 const baudrate_table[] = { 115200, 921600, 1152000, 1500000 }; static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ, F81232_CLK_18_46_MHZ, F81232_CLK_24_MHZ }; static int calc_baud_divisor(speed_t baudrate, speed_t clockrate) { return DIV_ROUND_CLOSEST(clockrate, baudrate); } static int f81232_get_register(struct usb_serial_port *port, u16 reg, u8 *val) { int status; struct usb_device *dev = port->serial->dev; status = usb_control_msg_recv(dev, 0, F81232_REGISTER_REQUEST, F81232_GET_REGISTER, reg, 0, val, sizeof(*val), USB_CTRL_GET_TIMEOUT, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s failed status: %d\n", __func__, status); status = usb_translate_errors(status); } return status; } static int f81232_set_register(struct usb_serial_port *port, u16 reg, u8 val) { int status; struct usb_device *dev = port->serial->dev; status = usb_control_msg_send(dev, 0, F81232_REGISTER_REQUEST, F81232_SET_REGISTER, reg, 0, &val, sizeof(val), USB_CTRL_SET_TIMEOUT, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s failed status: %d\n", __func__, status); status = usb_translate_errors(status); } return status; } static int f81232_set_mask_register(struct usb_serial_port *port, u16 reg, u8 mask, u8 val) { int status; u8 tmp; status = f81232_get_register(port, reg, &tmp); if (status) return status; tmp = (tmp & ~mask) | (val & mask); return f81232_set_register(port, reg, tmp); } static void f81232_read_msr(struct usb_serial_port *port) { int status; u8 current_msr; struct tty_struct *tty; struct f81232_private *priv = usb_get_serial_port_data(port); mutex_lock(&priv->lock); status = f81232_get_register(port, MODEM_STATUS_REGISTER, &current_msr); if (status) { dev_err(&port->dev, "%s fail, status: %d\n", __func__, status); mutex_unlock(&priv->lock); return; } if (!(current_msr & UART_MSR_ANY_DELTA)) { mutex_unlock(&priv->lock); return; } priv->modem_status = current_msr; if (current_msr & UART_MSR_DCTS) port->icount.cts++; if (current_msr & UART_MSR_DDSR) port->icount.dsr++; if (current_msr & UART_MSR_TERI) port->icount.rng++; if (current_msr & UART_MSR_DDCD) { port->icount.dcd++; tty = tty_port_tty_get(&port->port); if (tty) { usb_serial_handle_dcd_change(port, tty, current_msr & UART_MSR_DCD); tty_kref_put(tty); } } wake_up_interruptible(&port->port.delta_msr_wait); mutex_unlock(&priv->lock); } static int f81232_set_mctrl(struct usb_serial_port *port, unsigned int set, unsigned int clear) { u8 val; int status; struct f81232_private *priv = usb_get_serial_port_data(port); if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0) return 0; /* no change */ /* 'set' takes precedence over 'clear' */ clear &= ~set; /* force enable interrupt with OUT2 */ mutex_lock(&priv->lock); val = UART_MCR_OUT2 | priv->modem_control; if (clear & TIOCM_DTR) val &= ~UART_MCR_DTR; if (clear & TIOCM_RTS) val &= ~UART_MCR_RTS; if (set & TIOCM_DTR) val |= UART_MCR_DTR; if (set & TIOCM_RTS) val |= UART_MCR_RTS; dev_dbg(&port->dev, "%s new:%02x old:%02x\n", __func__, val, priv->modem_control); status = f81232_set_register(port, MODEM_CONTROL_REGISTER, val); if (status) { dev_err(&port->dev, "%s set MCR status < 0\n", __func__); mutex_unlock(&priv->lock); return status; } priv->modem_control = val; mutex_unlock(&priv->lock); return 0; } static void f81232_update_line_status(struct usb_serial_port *port, unsigned char *data, size_t actual_length) { struct f81232_private *priv = usb_get_serial_port_data(port); if (!actual_length) return; switch (data[0] & 0x07) { case 0x00: /* msr change */ dev_dbg(&port->dev, "IIR: MSR Change: %02x\n", data[0]); schedule_work(&priv->interrupt_work); break; case 0x02: /* tx-empty */ break; case 0x04: /* rx data available */ break; case 0x06: /* lsr change */ /* we can forget it. the LSR will read from bulk-in */ dev_dbg(&port->dev, "IIR: LSR Change: %02x\n", data[0]); break; } } static void f81232_read_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned int actual_length = urb->actual_length; int status = urb->status; int retval; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_serial_debug_data(&port->dev, __func__, urb->actual_length, urb->transfer_buffer); f81232_update_line_status(port, data, actual_length); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } static char f81232_handle_lsr(struct usb_serial_port *port, u8 lsr) { struct f81232_private *priv = usb_get_serial_port_data(port); char tty_flag = TTY_NORMAL; if (!(lsr & UART_LSR_BRK_ERROR_BITS)) return tty_flag; if (lsr & UART_LSR_BI) { tty_flag = TTY_BREAK; port->icount.brk++; usb_serial_handle_break(port); } else if (lsr & UART_LSR_PE) { tty_flag = TTY_PARITY; port->icount.parity++; } else if (lsr & UART_LSR_FE) { tty_flag = TTY_FRAME; port->icount.frame++; } if (lsr & UART_LSR_OE) { port->icount.overrun++; schedule_work(&priv->lsr_work); tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); } return tty_flag; } static void f81232_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; char tty_flag; unsigned int i; u8 lsr; /* * When opening the port we get a 1-byte packet with the current LSR, * which we discard. */ if ((urb->actual_length < 2) || (urb->actual_length % 2)) return; /* bulk-in data: [LSR(1Byte)+DATA(1Byte)][LSR(1Byte)+DATA(1Byte)]... */ for (i = 0; i < urb->actual_length; i += 2) { lsr = data[i]; tty_flag = f81232_handle_lsr(port, lsr); if (port->sysrq) { if (usb_serial_handle_sysrq_char(port, data[i + 1])) continue; } tty_insert_flip_char(&port->port, data[i + 1], tty_flag); } tty_flip_buffer_push(&port->port); } static void f81534a_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; char tty_flag; unsigned int i; u8 lsr; u8 len; if (urb->actual_length < 3) { dev_err(&port->dev, "short message received: %d\n", urb->actual_length); return; } len = data[0]; if (len != urb->actual_length) { dev_err(&port->dev, "malformed message received: %d (%d)\n", urb->actual_length, len); return; } /* bulk-in data: [LEN][Data.....][LSR] */ lsr = data[len - 1]; tty_flag = f81232_handle_lsr(port, lsr); if (port->sysrq) { for (i = 1; i < len - 1; ++i) { if (!usb_serial_handle_sysrq_char(port, data[i])) { tty_insert_flip_char(&port->port, data[i], tty_flag); } } } else { tty_insert_flip_string_fixed_flag(&port->port, &data[1], tty_flag, len - 2); } tty_flip_buffer_push(&port->port); } static int f81232_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct f81232_private *priv = usb_get_serial_port_data(port); int status; mutex_lock(&priv->lock); if (break_state) priv->shadow_lcr |= UART_LCR_SBC; else priv->shadow_lcr &= ~UART_LCR_SBC; status = f81232_set_register(port, LINE_CONTROL_REGISTER, priv->shadow_lcr); if (status) dev_err(&port->dev, "set break failed: %d\n", status); mutex_unlock(&priv->lock); return status; } static int f81232_find_clk(speed_t baudrate) { int idx; for (idx = 0; idx < ARRAY_SIZE(baudrate_table); ++idx) { if (baudrate <= baudrate_table[idx] && baudrate_table[idx] % baudrate == 0) return idx; } return -EINVAL; } static void f81232_set_baudrate(struct tty_struct *tty, struct usb_serial_port *port, speed_t baudrate, speed_t old_baudrate) { struct f81232_private *priv = usb_get_serial_port_data(port); u8 lcr; int divisor; int status = 0; int i; int idx; speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE }; for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { baudrate = baud_list[i]; if (baudrate == 0) { tty_encode_baud_rate(tty, 0, 0); return; } idx = f81232_find_clk(baudrate); if (idx >= 0) { tty_encode_baud_rate(tty, baudrate, baudrate); break; } } if (idx < 0) return; priv->baud_base = baudrate_table[idx]; divisor = calc_baud_divisor(baudrate, priv->baud_base); status = f81232_set_mask_register(port, F81232_CLK_REGISTER, F81232_CLK_MASK, clock_table[idx]); if (status) { dev_err(&port->dev, "%s failed to set CLK_REG: %d\n", __func__, status); return; } status = f81232_get_register(port, LINE_CONTROL_REGISTER, &lcr); /* get LCR */ if (status) { dev_err(&port->dev, "%s failed to get LCR: %d\n", __func__, status); return; } status = f81232_set_register(port, LINE_CONTROL_REGISTER, lcr | UART_LCR_DLAB); /* Enable DLAB */ if (status) { dev_err(&port->dev, "%s failed to set DLAB: %d\n", __func__, status); return; } status = f81232_set_register(port, RECEIVE_BUFFER_REGISTER, divisor & 0x00ff); /* low */ if (status) { dev_err(&port->dev, "%s failed to set baudrate MSB: %d\n", __func__, status); goto reapply_lcr; } status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, (divisor & 0xff00) >> 8); /* high */ if (status) { dev_err(&port->dev, "%s failed to set baudrate LSB: %d\n", __func__, status); } reapply_lcr: status = f81232_set_register(port, LINE_CONTROL_REGISTER, lcr & ~UART_LCR_DLAB); if (status) { dev_err(&port->dev, "%s failed to set DLAB: %d\n", __func__, status); } } static int f81232_port_enable(struct usb_serial_port *port) { u8 val; int status; /* fifo on, trigger8, clear TX/RX*/ val = UART_FCR_TRIGGER_8 | UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT; status = f81232_set_register(port, FIFO_CONTROL_REGISTER, val); if (status) { dev_err(&port->dev, "%s failed to set FCR: %d\n", __func__, status); return status; } /* MSR Interrupt only, LSR will read from Bulk-in odd byte */ status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, UART_IER_MSI); if (status) { dev_err(&port->dev, "%s failed to set IER: %d\n", __func__, status); return status; } return 0; } static int f81232_port_disable(struct usb_serial_port *port) { int status; status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, 0); if (status) { dev_err(&port->dev, "%s failed to set IER: %d\n", __func__, status); return status; } return 0; } static void f81232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct f81232_private *priv = usb_get_serial_port_data(port); u8 new_lcr = 0; int status = 0; speed_t baudrate; speed_t old_baud; /* Don't change anything if nothing has changed */ if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios)) return; if (C_BAUD(tty) == B0) f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS); else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) f81232_set_mctrl(port, TIOCM_DTR | TIOCM_RTS, 0); baudrate = tty_get_baud_rate(tty); if (baudrate > 0) { if (old_termios) old_baud = tty_termios_baud_rate(old_termios); else old_baud = F81232_DEF_BAUDRATE; f81232_set_baudrate(tty, port, baudrate, old_baud); } if (C_PARENB(tty)) { new_lcr |= UART_LCR_PARITY; if (!C_PARODD(tty)) new_lcr |= UART_LCR_EPAR; if (C_CMSPAR(tty)) new_lcr |= UART_LCR_SPAR; } if (C_CSTOPB(tty)) new_lcr |= UART_LCR_STOP; new_lcr |= UART_LCR_WLEN(tty_get_char_size(tty->termios.c_cflag)); mutex_lock(&priv->lock); new_lcr |= (priv->shadow_lcr & UART_LCR_SBC); status = f81232_set_register(port, LINE_CONTROL_REGISTER, new_lcr); if (status) { dev_err(&port->dev, "%s failed to set LCR: %d\n", __func__, status); } priv->shadow_lcr = new_lcr; mutex_unlock(&priv->lock); } static int f81232_tiocmget(struct tty_struct *tty) { int r; struct usb_serial_port *port = tty->driver_data; struct f81232_private *port_priv = usb_get_serial_port_data(port); u8 mcr, msr; /* force get current MSR changed state */ f81232_read_msr(port); mutex_lock(&port_priv->lock); mcr = port_priv->modem_control; msr = port_priv->modem_status; mutex_unlock(&port_priv->lock); r = (mcr & UART_MCR_DTR ? TIOCM_DTR : 0) | (mcr & UART_MCR_RTS ? TIOCM_RTS : 0) | (msr & UART_MSR_CTS ? TIOCM_CTS : 0) | (msr & UART_MSR_DCD ? TIOCM_CAR : 0) | (msr & UART_MSR_RI ? TIOCM_RI : 0) | (msr & UART_MSR_DSR ? TIOCM_DSR : 0); return r; } static int f81232_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; return f81232_set_mctrl(port, set, clear); } static int f81232_open(struct tty_struct *tty, struct usb_serial_port *port) { int result; result = f81232_port_enable(port); if (result) return result; /* Setup termios */ if (tty) f81232_set_termios(tty, port, NULL); result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (result) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, result); return result; } result = usb_serial_generic_open(tty, port); if (result) { usb_kill_urb(port->interrupt_in_urb); return result; } return 0; } static int f81534a_open(struct tty_struct *tty, struct usb_serial_port *port) { int status; u8 mask; u8 val; val = F81534A_TRIGGER_MULTIPLE_4X | F81534A_FIFO_128BYTE; mask = F81534A_TRIGGER_MASK | F81534A_FIFO_128BYTE; status = f81232_set_mask_register(port, F81534A_MODE_REG, mask, val); if (status) { dev_err(&port->dev, "failed to set MODE_REG: %d\n", status); return status; } return f81232_open(tty, port); } static void f81232_close(struct usb_serial_port *port) { struct f81232_private *port_priv = usb_get_serial_port_data(port); f81232_port_disable(port); usb_serial_generic_close(port); usb_kill_urb(port->interrupt_in_urb); flush_work(&port_priv->interrupt_work); flush_work(&port_priv->lsr_work); } static void f81232_dtr_rts(struct usb_serial_port *port, int on) { if (on) f81232_set_mctrl(port, TIOCM_DTR | TIOCM_RTS, 0); else f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS); } static bool f81232_tx_empty(struct usb_serial_port *port) { int status; u8 tmp; status = f81232_get_register(port, LINE_STATUS_REGISTER, &tmp); if (!status) { if ((tmp & UART_LSR_TEMT) != UART_LSR_TEMT) return false; } return true; } static int f81232_carrier_raised(struct usb_serial_port *port) { u8 msr; struct f81232_private *priv = usb_get_serial_port_data(port); mutex_lock(&priv->lock); msr = priv->modem_status; mutex_unlock(&priv->lock); if (msr & UART_MSR_DCD) return 1; return 0; } static void f81232_get_serial(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct f81232_private *priv = usb_get_serial_port_data(port); ss->baud_base = priv->baud_base; } static void f81232_interrupt_work(struct work_struct *work) { struct f81232_private *priv = container_of(work, struct f81232_private, interrupt_work); f81232_read_msr(priv->port); } static void f81232_lsr_worker(struct work_struct *work) { struct f81232_private *priv; struct usb_serial_port *port; int status; u8 tmp; priv = container_of(work, struct f81232_private, lsr_work); port = priv->port; status = f81232_get_register(port, LINE_STATUS_REGISTER, &tmp); if (status) dev_warn(&port->dev, "read LSR failed: %d\n", status); } static int f81534a_ctrl_set_register(struct usb_interface *intf, u16 reg, u16 size, void *val) { struct usb_device *dev = interface_to_usbdev(intf); int retry = F81534A_ACCESS_REG_RETRY; int status; while (retry--) { status = usb_control_msg_send(dev, 0, F81232_REGISTER_REQUEST, F81232_SET_REGISTER, reg, 0, val, size, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); if (status) { status = usb_translate_errors(status); if (status == -EIO) continue; } break; } if (status) { dev_err(&intf->dev, "failed to set register 0x%x: %d\n", reg, status); } return status; } static int f81534a_ctrl_enable_all_ports(struct usb_interface *intf, bool en) { unsigned char enable[2] = {0}; int status; /* * Enable all available serial ports, define as following: * bit 15 : Reset behavior (when HUB got soft reset) * 0: maintain all serial port enabled state. * 1: disable all serial port. * bit 0~11 : Serial port enable bit. */ if (en) { enable[0] = 0xff; enable[1] = 0x8f; } status = f81534a_ctrl_set_register(intf, F81534A_CTRL_CMD_ENABLE_PORT, sizeof(enable), enable); if (status) dev_err(&intf->dev, "failed to enable ports: %d\n", status); return status; } static int f81534a_ctrl_probe(struct usb_interface *intf, const struct usb_device_id *id) { return f81534a_ctrl_enable_all_ports(intf, true); } static void f81534a_ctrl_disconnect(struct usb_interface *intf) { f81534a_ctrl_enable_all_ports(intf, false); } static int f81534a_ctrl_resume(struct usb_interface *intf) { return f81534a_ctrl_enable_all_ports(intf, true); } static int f81232_port_probe(struct usb_serial_port *port) { struct f81232_private *priv; priv = devm_kzalloc(&port->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; mutex_init(&priv->lock); INIT_WORK(&priv->interrupt_work, f81232_interrupt_work); INIT_WORK(&priv->lsr_work, f81232_lsr_worker); usb_set_serial_port_data(port, priv); priv->port = port; return 0; } static int f81534a_port_probe(struct usb_serial_port *port) { int status; /* tri-state with pull-high, default RS232 Mode */ status = f81232_set_register(port, F81534A_GPIO_REG, F81534A_GPIO_MODE2_DIR); if (status) return status; return f81232_port_probe(port); } static int f81232_suspend(struct usb_serial *serial, pm_message_t message) { struct usb_serial_port *port = serial->port[0]; struct f81232_private *port_priv = usb_get_serial_port_data(port); int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_kill_urb(port->read_urbs[i]); usb_kill_urb(port->interrupt_in_urb); if (port_priv) { flush_work(&port_priv->interrupt_work); flush_work(&port_priv->lsr_work); } return 0; } static int f81232_resume(struct usb_serial *serial) { struct usb_serial_port *port = serial->port[0]; int result; if (tty_port_initialized(&port->port)) { result = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); if (result) { dev_err(&port->dev, "submit interrupt urb failed: %d\n", result); return result; } } return usb_serial_generic_resume(serial); } static struct usb_serial_driver f81232_device = { .driver = { .name = "f81232", }, .id_table = f81232_id_table, .num_ports = 1, .bulk_in_size = 256, .bulk_out_size = 256, .open = f81232_open, .close = f81232_close, .dtr_rts = f81232_dtr_rts, .carrier_raised = f81232_carrier_raised, .get_serial = f81232_get_serial, .break_ctl = f81232_break_ctl, .set_termios = f81232_set_termios, .tiocmget = f81232_tiocmget, .tiocmset = f81232_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .tx_empty = f81232_tx_empty, .process_read_urb = f81232_process_read_urb, .read_int_callback = f81232_read_int_callback, .port_probe = f81232_port_probe, .suspend = f81232_suspend, .resume = f81232_resume, }; static struct usb_serial_driver f81534a_device = { .driver = { .name = "f81534a", }, .id_table = f81534a_id_table, .num_ports = 1, .open = f81534a_open, .close = f81232_close, .dtr_rts = f81232_dtr_rts, .carrier_raised = f81232_carrier_raised, .get_serial = f81232_get_serial, .break_ctl = f81232_break_ctl, .set_termios = f81232_set_termios, .tiocmget = f81232_tiocmget, .tiocmset = f81232_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .tx_empty = f81232_tx_empty, .process_read_urb = f81534a_process_read_urb, .read_int_callback = f81232_read_int_callback, .port_probe = f81534a_port_probe, .suspend = f81232_suspend, .resume = f81232_resume, }; static struct usb_serial_driver * const serial_drivers[] = { &f81232_device, &f81534a_device, NULL, }; static struct usb_driver f81534a_ctrl_driver = { .name = "f81534a_ctrl", .id_table = f81534a_ctrl_id_table, .probe = f81534a_ctrl_probe, .disconnect = f81534a_ctrl_disconnect, .resume = f81534a_ctrl_resume, }; static int __init f81232_init(void) { int status; status = usb_register_driver(&f81534a_ctrl_driver, THIS_MODULE, KBUILD_MODNAME); if (status) return status; status = usb_serial_register_drivers(serial_drivers, KBUILD_MODNAME, combined_id_table); if (status) { usb_deregister(&f81534a_ctrl_driver); return status; } return 0; } static void __exit f81232_exit(void) { usb_serial_deregister_drivers(serial_drivers); usb_deregister(&f81534a_ctrl_driver); } module_init(f81232_init); module_exit(f81232_exit); MODULE_DESCRIPTION("Fintek F81232/532A/534A/535/536 USB to serial driver"); MODULE_AUTHOR("Greg Kroah-Hartman <gregkh@linuxfoundation.org>"); MODULE_AUTHOR("Peter Hong <peter_hong@fintek.com.tw>"); MODULE_LICENSE("GPL v2");
246 8 8 24 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 // SPDX-License-Identifier: GPL-2.0 /* * Creating audit events from TTY input. * * Copyright (C) 2007 Red Hat, Inc. All rights reserved. * * Authors: Miloslav Trmac <mitr@redhat.com> */ #include <linux/audit.h> #include <linux/slab.h> #include <linux/tty.h> #include "tty.h" struct tty_audit_buf { struct mutex mutex; /* Protects all data below */ dev_t dev; /* The TTY which the data is from */ bool icanon; size_t valid; u8 *data; /* Allocated size N_TTY_BUF_SIZE */ }; static struct tty_audit_buf *tty_audit_buf_ref(void) { struct tty_audit_buf *buf; buf = current->signal->tty_audit_buf; WARN_ON(buf == ERR_PTR(-ESRCH)); return buf; } static struct tty_audit_buf *tty_audit_buf_alloc(void) { struct tty_audit_buf *buf; buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) goto err; buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); if (!buf->data) goto err_buf; mutex_init(&buf->mutex); return buf; err_buf: kfree(buf); err: return NULL; } static void tty_audit_buf_free(struct tty_audit_buf *buf) { WARN_ON(buf->valid != 0); kfree(buf->data); kfree(buf); } static void tty_audit_log(const char *description, dev_t dev, const u8 *data, size_t size) { struct audit_buffer *ab; pid_t pid = task_pid_nr(current); uid_t uid = from_kuid(&init_user_ns, task_uid(current)); uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current)); unsigned int sessionid = audit_get_sessionid(current); char name[TASK_COMM_LEN]; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_TTY); if (!ab) return; audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u major=%d minor=%d comm=", description, pid, uid, loginuid, sessionid, MAJOR(dev), MINOR(dev)); get_task_comm(name, current); audit_log_untrustedstring(ab, name); audit_log_format(ab, " data="); audit_log_n_hex(ab, data, size); audit_log_end(ab); } /* * tty_audit_buf_push - Push buffered data out * * Generate an audit message from the contents of @buf, which is owned by * the current task. @buf->mutex must be locked. */ static void tty_audit_buf_push(struct tty_audit_buf *buf) { if (buf->valid == 0) return; if (audit_enabled == AUDIT_OFF) { buf->valid = 0; return; } tty_audit_log("tty", buf->dev, buf->data, buf->valid); buf->valid = 0; } /** * tty_audit_exit - Handle a task exit * * Make sure all buffered data is written out and deallocate the buffer. * Only needs to be called if current->signal->tty_audit_buf != %NULL. * * The process is single-threaded at this point; no other threads share * current->signal. */ void tty_audit_exit(void) { struct tty_audit_buf *buf; buf = xchg(&current->signal->tty_audit_buf, ERR_PTR(-ESRCH)); if (!buf) return; tty_audit_buf_push(buf); tty_audit_buf_free(buf); } /* * tty_audit_fork - Copy TTY audit state for a new task * * Set up TTY audit state in @sig from current. @sig needs no locking. */ void tty_audit_fork(struct signal_struct *sig) { sig->audit_tty = current->signal->audit_tty; } /* * tty_audit_tiocsti - Log TIOCSTI */ void tty_audit_tiocsti(const struct tty_struct *tty, u8 ch) { dev_t dev; dev = MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; if (tty_audit_push()) return; if (audit_enabled) tty_audit_log("ioctl=TIOCSTI", dev, &ch, 1); } /* * tty_audit_push - Flush current's pending audit data * * Returns 0 if success, -EPERM if tty audit is disabled */ int tty_audit_push(void) { struct tty_audit_buf *buf; if (~current->signal->audit_tty & AUDIT_TTY_ENABLE) return -EPERM; buf = tty_audit_buf_ref(); if (!IS_ERR_OR_NULL(buf)) { mutex_lock(&buf->mutex); tty_audit_buf_push(buf); mutex_unlock(&buf->mutex); } return 0; } /* * tty_audit_buf_get - Get an audit buffer. * * Get an audit buffer, allocate it if necessary. Return %NULL * if out of memory or ERR_PTR(-ESRCH) if tty_audit_exit() has already * occurred. Otherwise, return a new reference to the buffer. */ static struct tty_audit_buf *tty_audit_buf_get(void) { struct tty_audit_buf *buf; buf = tty_audit_buf_ref(); if (buf) return buf; buf = tty_audit_buf_alloc(); if (buf == NULL) { audit_log_lost("out of memory in TTY auditing"); return NULL; } /* Race to use this buffer, free it if another wins */ if (cmpxchg(&current->signal->tty_audit_buf, NULL, buf) != NULL) tty_audit_buf_free(buf); return tty_audit_buf_ref(); } /* * tty_audit_add_data - Add data for TTY auditing. * * Audit @data of @size from @tty, if necessary. */ void tty_audit_add_data(const struct tty_struct *tty, const void *data, size_t size) { struct tty_audit_buf *buf; unsigned int audit_tty; bool icanon = L_ICANON(tty); dev_t dev; audit_tty = READ_ONCE(current->signal->audit_tty); if (~audit_tty & AUDIT_TTY_ENABLE) return; if (unlikely(size == 0)) return; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) return; if ((~audit_tty & AUDIT_TTY_LOG_PASSWD) && icanon && !L_ECHO(tty)) return; buf = tty_audit_buf_get(); if (IS_ERR_OR_NULL(buf)) return; mutex_lock(&buf->mutex); dev = MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; if (buf->dev != dev || buf->icanon != icanon) { tty_audit_buf_push(buf); buf->dev = dev; buf->icanon = icanon; } do { size_t run; run = N_TTY_BUF_SIZE - buf->valid; if (run > size) run = size; memcpy(buf->data + buf->valid, data, run); buf->valid += run; data += run; size -= run; if (buf->valid == N_TTY_BUF_SIZE) tty_audit_buf_push(buf); } while (size != 0); mutex_unlock(&buf->mutex); }
14 14 13 6 9 17 17 33 32 14 17 14 2 27 24 1 2 5 3 2 1 7 5 5 1 11 11 1 1 38 1 28 9 5 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * Copyright (c) 2014 Intel Corporation * Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/random.h> #include <linux/smp.h> #include <linux/static_key.h> #include <net/dst.h> #include <net/ip.h> #include <net/sock.h> #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ #define NFT_META_SECS_PER_MINUTE 60 #define NFT_META_SECS_PER_HOUR 3600 #define NFT_META_SECS_PER_DAY 86400 #define NFT_META_DAYS_PER_WEEK 7 static u8 nft_meta_weekday(void) { time64_t secs = ktime_get_real_seconds(); unsigned int dse; u8 wday; secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest; dse = div_u64(secs, NFT_META_SECS_PER_DAY); wday = (4 + dse) % NFT_META_DAYS_PER_WEEK; return wday; } static u32 nft_meta_hour(time64_t secs) { struct tm tm; time64_to_tm(secs, 0, &tm); return tm.tm_hour * NFT_META_SECS_PER_HOUR + tm.tm_min * NFT_META_SECS_PER_MINUTE + tm.tm_sec; } static noinline_for_stack void nft_meta_get_eval_time(enum nft_meta_keys key, u32 *dest) { switch (key) { case NFT_META_TIME_NS: nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday()); break; case NFT_META_TIME_HOUR: *dest = nft_meta_hour(ktime_get_real_seconds()); break; default: break; } } static noinline bool nft_meta_get_eval_pkttype_lo(const struct nft_pktinfo *pkt, u32 *dest) { const struct sk_buff *skb = pkt->skb; switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) nft_reg_store8(dest, PACKET_MULTICAST); else nft_reg_store8(dest, PACKET_BROADCAST); break; case NFPROTO_IPV6: nft_reg_store8(dest, PACKET_MULTICAST); break; case NFPROTO_NETDEV: switch (skb->protocol) { case htons(ETH_P_IP): { int noff = skb_network_offset(skb); struct iphdr *iph, _iph; iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph); if (!iph) return false; if (ipv4_is_multicast(iph->daddr)) nft_reg_store8(dest, PACKET_MULTICAST); else nft_reg_store8(dest, PACKET_BROADCAST); break; } case htons(ETH_P_IPV6): nft_reg_store8(dest, PACKET_MULTICAST); break; default: WARN_ON_ONCE(1); return false; } break; default: WARN_ON_ONCE(1); return false; } return true; } static noinline bool nft_meta_get_eval_skugid(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { struct sock *sk = skb_to_full_sk(pkt->skb); struct socket *sock; if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) return false; read_lock_bh(&sk->sk_callback_lock); sock = sk->sk_socket; if (!sock || !sock->file) { read_unlock_bh(&sk->sk_callback_lock); return false; } switch (key) { case NFT_META_SKUID: *dest = from_kuid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsuid); break; case NFT_META_SKGID: *dest = from_kgid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsgid); break; default: break; } read_unlock_bh(&sk->sk_callback_lock); return true; } #ifdef CONFIG_CGROUP_NET_CLASSID static noinline bool nft_meta_get_eval_cgroup(u32 *dest, const struct nft_pktinfo *pkt) { struct sock *sk = skb_to_full_sk(pkt->skb); if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) return false; *dest = sock_cgroup_classid(&sk->sk_cgrp_data); return true; } #endif static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); switch (key) { case NFT_META_IIFKIND: if (!in || !in->rtnl_link_ops) return false; strscpy_pad((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ); break; case NFT_META_OIFKIND: if (!out || !out->rtnl_link_ops) return false; strscpy_pad((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; default: return false; } return true; } static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev) { *dest = dev ? dev->ifindex : 0; } static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev) { strscpy_pad((char *)dest, dev ? dev->name : "", IFNAMSIZ); } static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev) { if (!dev) return false; nft_reg_store16(dest, dev->type); return true; } static bool nft_meta_store_ifgroup(u32 *dest, const struct net_device *dev) { if (!dev) return false; *dest = dev->group; return true; } static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { switch (key) { case NFT_META_IIFNAME: nft_meta_store_ifname(dest, nft_in(pkt)); break; case NFT_META_OIFNAME: nft_meta_store_ifname(dest, nft_out(pkt)); break; case NFT_META_IIF: nft_meta_store_ifindex(dest, nft_in(pkt)); break; case NFT_META_OIF: nft_meta_store_ifindex(dest, nft_out(pkt)); break; case NFT_META_IFTYPE: if (!nft_meta_store_iftype(dest, pkt->skb->dev)) return false; break; case __NFT_META_IIFTYPE: if (!nft_meta_store_iftype(dest, nft_in(pkt))) return false; break; case NFT_META_OIFTYPE: if (!nft_meta_store_iftype(dest, nft_out(pkt))) return false; break; case NFT_META_IIFGROUP: if (!nft_meta_store_ifgroup(dest, nft_in(pkt))) return false; break; case NFT_META_OIFGROUP: if (!nft_meta_store_ifgroup(dest, nft_out(pkt))) return false; break; default: return false; } return true; } #ifdef CONFIG_IP_ROUTE_CLASSID static noinline bool nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest) { const struct dst_entry *dst = skb_dst(skb); if (!dst) return false; *dest = dst->tclassid; return true; } #endif static noinline u32 nft_meta_get_eval_sdif(const struct nft_pktinfo *pkt) { switch (nft_pf(pkt)) { case NFPROTO_IPV4: return inet_sdif(pkt->skb); case NFPROTO_IPV6: return inet6_sdif(pkt->skb); } return 0; } static noinline void nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt) { u32 sdif = nft_meta_get_eval_sdif(pkt); const struct net_device *dev; dev = sdif ? dev_get_by_index_rcu(nft_net(pkt), sdif) : NULL; nft_meta_store_ifname(dest, dev); } void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; u32 *dest = &regs->data[priv->dreg]; switch (priv->key) { case NFT_META_LEN: *dest = skb->len; break; case NFT_META_PROTOCOL: nft_reg_store16(dest, (__force u16)skb->protocol); break; case NFT_META_NFPROTO: nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; nft_reg_store8(dest, pkt->tprot); break; case NFT_META_PRIORITY: *dest = skb->priority; break; case NFT_META_MARK: *dest = skb->mark; break; case NFT_META_IIF: case NFT_META_OIF: case NFT_META_IIFNAME: case NFT_META_OIFNAME: case NFT_META_IIFTYPE: case NFT_META_OIFTYPE: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: if (!nft_meta_get_eval_ifname(priv->key, dest, pkt)) goto err; break; case NFT_META_SKUID: case NFT_META_SKGID: if (!nft_meta_get_eval_skugid(priv->key, dest, pkt)) goto err; break; #ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: if (!nft_meta_get_eval_rtclassid(skb, dest)) goto err; break; #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: *dest = skb->secmark; break; #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { nft_reg_store8(dest, skb->pkt_type); break; } if (!nft_meta_get_eval_pkttype_lo(pkt, dest)) goto err; break; case NFT_META_CPU: *dest = raw_smp_processor_id(); break; #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: if (!nft_meta_get_eval_cgroup(dest, pkt)) goto err; break; #endif case NFT_META_PRANDOM: *dest = get_random_u32(); break; #ifdef CONFIG_XFRM case NFT_META_SECPATH: nft_reg_store8(dest, secpath_exists(skb)); break; #endif case NFT_META_IIFKIND: case NFT_META_OIFKIND: if (!nft_meta_get_eval_kind(priv->key, dest, pkt)) goto err; break; case NFT_META_TIME_NS: case NFT_META_TIME_DAY: case NFT_META_TIME_HOUR: nft_meta_get_eval_time(priv->key, dest); break; case NFT_META_SDIF: *dest = nft_meta_get_eval_sdif(pkt); break; case NFT_META_SDIFNAME: nft_meta_get_eval_sdifname(dest, pkt); break; default: WARN_ON(1); goto err; } return; err: regs->verdict.code = NFT_BREAK; } EXPORT_SYMBOL_GPL(nft_meta_get_eval); void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; u32 *sreg = &regs->data[meta->sreg]; u32 value = *sreg; u8 value8; switch (meta->key) { case NFT_META_MARK: skb->mark = value; break; case NFT_META_PRIORITY: skb->priority = value; break; case NFT_META_PKTTYPE: value8 = nft_reg_load8(sreg); if (skb->pkt_type != value8 && skb_pkt_type_ok(value8) && skb_pkt_type_ok(skb->pkt_type)) skb->pkt_type = value8; break; case NFT_META_NFTRACE: value8 = nft_reg_load8(sreg); skb->nf_trace = !!value8; break; #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: skb->secmark = value; break; #endif default: WARN_ON(1); } } EXPORT_SYMBOL_GPL(nft_meta_set_eval); const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_META_SREG] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(nft_meta_policy); int nft_meta_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: case NFT_META_IIFTYPE: case NFT_META_OIFTYPE: len = sizeof(u16); break; case NFT_META_NFPROTO: case NFT_META_L4PROTO: case NFT_META_LEN: case NFT_META_PRIORITY: case NFT_META_MARK: case NFT_META_IIF: case NFT_META_OIF: case NFT_META_SDIF: case NFT_META_SKUID: case NFT_META_SKGID: #ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif case NFT_META_PKTTYPE: case NFT_META_CPU: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: #endif len = sizeof(u32); break; case NFT_META_IIFNAME: case NFT_META_OIFNAME: case NFT_META_IIFKIND: case NFT_META_OIFKIND: case NFT_META_SDIFNAME: len = IFNAMSIZ; break; case NFT_META_PRANDOM: len = sizeof(u32); break; #ifdef CONFIG_XFRM case NFT_META_SECPATH: len = sizeof(u8); break; #endif case NFT_META_TIME_NS: len = sizeof(u64); break; case NFT_META_TIME_DAY: len = sizeof(u8); break; case NFT_META_TIME_HOUR: len = sizeof(u32); break; default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); static int nft_meta_get_validate_sdif(const struct nft_ctx *ctx) { unsigned int hooks; switch (ctx->family) { case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD); break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); } static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx) { #ifdef CONFIG_XFRM unsigned int hooks; switch (ctx->family) { case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD); break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); #else return 0; #endif } static int nft_meta_get_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); switch (priv->key) { case NFT_META_SECPATH: return nft_meta_get_validate_xfrm(ctx); case NFT_META_SDIF: case NFT_META_SDIFNAME: return nft_meta_get_validate_sdif(ctx); default: break; } return 0; } int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int hooks; if (priv->key != NFT_META_PKTTYPE) return 0; switch (ctx->family) { case NFPROTO_BRIDGE: hooks = 1 << NF_BR_PRE_ROUTING; break; case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = 1 << NF_INET_PRE_ROUTING; break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); } EXPORT_SYMBOL_GPL(nft_meta_set_validate); int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_MARK: case NFT_META_PRIORITY: #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif len = sizeof(u32); break; case NFT_META_NFTRACE: len = sizeof(u8); break; case NFT_META_PKTTYPE: len = sizeof(u8); break; default: return -EOPNOTSUPP; } priv->len = len; err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; if (priv->key == NFT_META_NFTRACE) static_branch_inc(&nft_trace_enabled); return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); int nft_meta_get_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_meta *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg)) goto nla_put_failure; return 0; nla_put_failure: return -1; } EXPORT_SYMBOL_GPL(nft_meta_get_dump); int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_meta *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg)) goto nla_put_failure; return 0; nla_put_failure: return -1; } EXPORT_SYMBOL_GPL(nft_meta_set_dump); void nft_meta_set_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); if (priv->key == NFT_META_NFTRACE) static_branch_dec(&nft_trace_enabled); } EXPORT_SYMBOL_GPL(nft_meta_set_destroy); static int nft_meta_get_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; switch (priv->key) { case NFT_META_PROTOCOL: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, sizeof(__u16), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case NFT_META_L4PROTO: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; case NFT_META_IIF: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, ingress_ifindex, sizeof(__u32), reg); break; case NFT_META_IIFTYPE: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, ingress_iftype, sizeof(__u16), reg); break; default: return -EOPNOTSUPP; } return 0; } bool nft_meta_get_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); const struct nft_meta *meta; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } meta = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != meta->key || priv->dreg != meta->dreg) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } EXPORT_SYMBOL_GPL(nft_meta_get_reduce); static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, .reduce = nft_meta_get_reduce, .validate = nft_meta_get_validate, .offload = nft_meta_get_offload, }; static bool nft_meta_set_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { int i; for (i = 0; i < NFT_REG32_NUM; i++) { if (!track->regs[i].selector) continue; if (track->regs[i].selector->ops != &nft_meta_get_ops) continue; __nft_reg_track_cancel(track, i); } return false; } static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, .reduce = nft_meta_set_reduce, .validate = nft_meta_set_validate, }; static const struct nft_expr_ops * nft_meta_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { if (tb[NFTA_META_KEY] == NULL) return ERR_PTR(-EINVAL); if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) return ERR_PTR(-EINVAL); #if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META) if (ctx->family == NFPROTO_BRIDGE) return ERR_PTR(-EAGAIN); #endif if (tb[NFTA_META_DREG]) return &nft_meta_get_ops; if (tb[NFTA_META_SREG]) return &nft_meta_set_ops; return ERR_PTR(-EINVAL); } static int nft_meta_inner_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) return -EINVAL; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: len = sizeof(u16); break; case NFT_META_L4PROTO: len = sizeof(u32); break; default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } void nft_meta_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { const struct nft_meta *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; switch (priv->key) { case NFT_META_PROTOCOL: nft_reg_store16(dest, (__force u16)tun_ctx->llproto); break; case NFT_META_L4PROTO: if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TH)) goto err; nft_reg_store8(dest, tun_ctx->l4proto); break; default: WARN_ON_ONCE(1); goto err; } return; err: regs->verdict.code = NFT_BREAK; } EXPORT_SYMBOL_GPL(nft_meta_inner_eval); static const struct nft_expr_ops nft_meta_inner_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .init = nft_meta_inner_init, .dump = nft_meta_get_dump, /* direct call to nft_meta_inner_eval(). */ }; struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", .select_ops = nft_meta_select_ops, .inner_ops = &nft_meta_inner_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, }; #ifdef CONFIG_NETWORK_SECMARK struct nft_secmark { u32 secid; char *ctx; }; static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = { [NFTA_SECMARK_CTX] = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN }, }; static int nft_secmark_compute_secid(struct nft_secmark *priv) { u32 tmp_secid = 0; int err; err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid); if (err) return err; if (!tmp_secid) return -ENOENT; err = security_secmark_relabel_packet(tmp_secid); if (err) return err; priv->secid = tmp_secid; return 0; } static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_secmark *priv = nft_obj_data(obj); struct sk_buff *skb = pkt->skb; skb->secmark = priv->secid; } static int nft_secmark_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_secmark *priv = nft_obj_data(obj); int err; if (tb[NFTA_SECMARK_CTX] == NULL) return -EINVAL; priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL_ACCOUNT); if (!priv->ctx) return -ENOMEM; err = nft_secmark_compute_secid(priv); if (err) { kfree(priv->ctx); return err; } security_secmark_refcount_inc(); return 0; } static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { struct nft_secmark *priv = nft_obj_data(obj); int err; if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx)) return -1; if (reset) { err = nft_secmark_compute_secid(priv); if (err) return err; } return 0; } static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_secmark *priv = nft_obj_data(obj); security_secmark_refcount_dec(); kfree(priv->ctx); } static const struct nft_object_ops nft_secmark_obj_ops = { .type = &nft_secmark_obj_type, .size = sizeof(struct nft_secmark), .init = nft_secmark_obj_init, .eval = nft_secmark_obj_eval, .dump = nft_secmark_obj_dump, .destroy = nft_secmark_obj_destroy, }; struct nft_object_type nft_secmark_obj_type __read_mostly = { .type = NFT_OBJECT_SECMARK, .ops = &nft_secmark_obj_ops, .maxattr = NFTA_SECMARK_MAX, .policy = nft_secmark_policy, .owner = THIS_MODULE, }; #endif /* CONFIG_NETWORK_SECMARK */
8 6 8 8 25 25 1 1 1 6 7 1 6 3 2 7 5 2 3 4 3 2 3 3 2 4 7 5 2 7 7 15 15 1 14 8 8 8 8 5 3 2 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/string.h> #include <linux/in.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/siphash.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/red.h> /* Stochastic Fairness Queuing algorithm. ======================================= Source: Paul E. McKenney "Stochastic Fairness Queuing", IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. Paul E. McKenney "Stochastic Fairness Queuing", "Interworking: Research and Experience", v.2, 1991, p.113-131. See also: M. Shreedhar and George Varghese "Efficient Fair Queuing using Deficit Round Robin", Proc. SIGCOMM 95. This is not the thing that is usually called (W)FQ nowadays. It does not use any timestamp mechanism, but instead processes queues in round-robin order. ADVANTAGE: - It is very cheap. Both CPU and memory requirements are minimal. DRAWBACKS: - "Stochastic" -> It is not 100% fair. When hash collisions occur, several flows are considered as one. - "Round-robin" -> It introduces larger delays than virtual clock based schemes, and should not be used for isolating interactive traffic from non-interactive. It means, that this scheduler should be used as leaf of CBQ or P3, which put interactive traffic to higher priority band. We still need true WFQ for top level CSZ, but using WFQ for the best effort traffic is absolutely pointless: SFQ is superior for this purpose. IMPLEMENTATION: This implementation limits : - maximal queue length per flow to 127 packets. - max mtu to 2^18-1; - max 65408 flows, - number of hash buckets to 65536. It is easy to increase these values, but not in flight. */ #define SFQ_MAX_DEPTH 127 /* max number of packets per flow */ #define SFQ_DEFAULT_FLOWS 128 #define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */ #define SFQ_EMPTY_SLOT 0xffff #define SFQ_DEFAULT_HASH_DIVISOR 1024 /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ typedef u16 sfq_index; /* * We dont use pointers to save space. * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH] * are 'pointers' to dep[] array */ struct sfq_head { sfq_index next; sfq_index prev; }; struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev; sfq_index qlen; /* number of skbs in skblist */ sfq_index next; /* next slot in sfq RR chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ int allot; /* credit for this slot */ unsigned int backlog; struct red_vars vars; }; struct sfq_sched_data { /* frequently used fields */ int limit; /* limit of total number of packets in this qdisc */ unsigned int divisor; /* number of slots in hash table */ u8 headdrop; u8 maxdepth; /* limit of packets per flow */ siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; struct tcf_proto __rcu *filter_list; struct tcf_block *block; sfq_index *ht; /* Hash table ('divisor' slots) */ struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ struct red_parms *red_parms; struct tc_sfqred_stats stats; struct sfq_slot *tail; /* current slot in round */ struct sfq_head dep[SFQ_MAX_DEPTH + 1]; /* Linked lists of slots, indexed by depth * dep[0] : list of unused flows * dep[1] : list of flows with 1 packet * dep[X] : list of flows with X packets */ unsigned int maxflows; /* number of flows in flows array */ int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; struct Qdisc *sch; }; /* * sfq_head are either in a sfq_slot or in dep[] array */ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) { if (val < SFQ_MAX_FLOWS) return &q->slots[val].dep; return &q->dep[val - SFQ_MAX_FLOWS]; } static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct sfq_sched_data *q = qdisc_priv(sch); struct tcf_result res; struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); fl = rcu_dereference_bh(q->filter_list); if (!fl) return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; case TC_ACT_SHOT: return 0; } #endif if (TC_H_MIN(res.classid) <= q->divisor) return TC_H_MIN(res.classid); } return 0; } /* * x : slot number [0 .. SFQ_MAX_FLOWS - 1] */ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; struct sfq_slot *slot = &q->slots[x]; int qlen = slot->qlen; p = qlen + SFQ_MAX_FLOWS; n = q->dep[qlen].next; slot->dep.next = n; slot->dep.prev = p; q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ sfq_dep_head(q, n)->prev = x; } #define sfq_unlink(q, x, n, p) \ do { \ n = q->slots[x].dep.next; \ p = q->slots[x].dep.prev; \ sfq_dep_head(q, p)->next = n; \ sfq_dep_head(q, n)->prev = p; \ } while (0) static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; int d; sfq_unlink(q, x, n, p); d = q->slots[x].qlen--; if (n == p && q->cur_depth == d) q->cur_depth--; sfq_link(q, x); } static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; int d; sfq_unlink(q, x, n, p); d = ++q->slots[x].qlen; if (q->cur_depth < d) q->cur_depth = d; sfq_link(q, x); } /* helper functions : might be changed when/if skb use a standard list_head */ /* remove one skb from tail of slot queue */ static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) { struct sk_buff *skb = slot->skblist_prev; slot->skblist_prev = skb->prev; skb->prev->next = (struct sk_buff *)slot; skb->next = skb->prev = NULL; return skb; } /* remove one skb from head of slot queue */ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) { struct sk_buff *skb = slot->skblist_next; slot->skblist_next = skb->next; skb->next->prev = (struct sk_buff *)slot; skb->next = skb->prev = NULL; return skb; } static inline void slot_queue_init(struct sfq_slot *slot) { memset(slot, 0, sizeof(*slot)); slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; } /* add skb to slot queue (tail add) */ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) { skb->prev = slot->skblist_prev; skb->next = (struct sk_buff *)slot; slot->skblist_prev->next = skb; slot->skblist_prev = skb; } static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index x, d = q->cur_depth; struct sk_buff *skb; unsigned int len; struct sfq_slot *slot; /* Queue is full! Find the longest slot and drop tail packet from it */ if (d > 1) { x = q->dep[d].next; slot = &q->slots[x]; drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); slot->backlog -= len; sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch, to_free); return len; } if (d == 1) { /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ x = q->tail->next; slot = &q->slots[x]; q->tail->next = slot->next; q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop; } return 0; } /* Is ECN parameter configured */ static int sfq_prob_mark(const struct sfq_sched_data *q) { return q->flags & TC_RED_ECN; } /* Should packets over max threshold just be marked */ static int sfq_hard_mark(const struct sfq_sched_data *q) { return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; } static int sfq_headdrop(const struct sfq_sched_data *q) { return q->headdrop; } static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash, dropped; sfq_index x, qlen; struct sfq_slot *slot; int ret; struct sk_buff *head; int delta; hash = sfq_classify(skb, sch, &ret); if (hash == 0) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); return ret; } hash--; x = q->ht[hash]; slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop(skb, sch, to_free); q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; slot->backlog = 0; /* should already be 0 anyway... */ red_set_vars(&slot->vars); goto enqueue; } if (q->red_parms) { slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, &slot->vars, slot->backlog); switch (red_action(q->red_parms, &slot->vars, slot->vars.qavg)) { case RED_DONT_MARK: break; case RED_PROB_MARK: qdisc_qstats_overlimit(sch); if (sfq_prob_mark(q)) { /* We know we have at least one packet in queue */ if (sfq_headdrop(q) && INET_ECN_set_ce(slot->skblist_next)) { q->stats.prob_mark_head++; break; } if (INET_ECN_set_ce(skb)) { q->stats.prob_mark++; break; } } q->stats.prob_drop++; goto congestion_drop; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); if (sfq_hard_mark(q)) { /* We know we have at least one packet in queue */ if (sfq_headdrop(q) && INET_ECN_set_ce(slot->skblist_next)) { q->stats.forced_mark_head++; break; } if (INET_ECN_set_ce(skb)) { q->stats.forced_mark++; break; } } q->stats.forced_drop++; goto congestion_drop; } } if (slot->qlen >= q->maxdepth) { congestion_drop: if (!sfq_headdrop(q)) return qdisc_drop(skb, sch, to_free); /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; slot->backlog -= delta; qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); qdisc_tree_reduce_backlog(sch, 0, delta); return NET_XMIT_CN; } enqueue: qdisc_qstats_backlog_inc(sch, skb); slot->backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; } else { slot->next = q->tail->next; q->tail->next = x; } /* We put this flow at the end of our flow list. * This might sound unfair for a new flow to wait after old ones, * but we could endup servicing new flows only, and freeze old ones. */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ slot->allot = q->quantum; } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; qlen = slot->qlen; dropped = sfq_drop(sch, to_free); /* Return Congestion Notification only if we dropped a packet * from this flow. */ if (qlen != slot->qlen) { qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN; } /* As we dropped a packet, better let upper stack know this */ qdisc_tree_reduce_backlog(sch, 1, dropped); return NET_XMIT_SUCCESS; } static struct sk_buff * sfq_dequeue(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; sfq_index a, next_a; struct sfq_slot *slot; /* No active slots */ if (q->tail == NULL) return NULL; next_slot: a = q->tail->next; slot = &q->slots[a]; if (slot->allot <= 0) { q->tail = slot; slot->allot += q->quantum; goto next_slot; } skb = slot_dequeue_head(slot); sfq_dec(q, a); qdisc_bstats_update(sch, skb); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); slot->backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (slot->qlen == 0) { q->ht[slot->hash] = SFQ_EMPTY_SLOT; next_a = slot->next; if (a == next_a) { q->tail = NULL; /* no more active slots */ return skb; } q->tail->next = next_a; } else { slot->allot -= qdisc_pkt_len(skb); } return skb; } static void sfq_reset(struct Qdisc *sch) { struct sk_buff *skb; while ((skb = sfq_dequeue(sch)) != NULL) rtnl_kfree_skbs(skb, skb); } /* * When q->perturbation is changed, we rehash all queued skbs * to avoid OOO (Out Of Order) effects. * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change * counters. */ static void sfq_rehash(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; int i; struct sfq_slot *slot; struct sk_buff_head list; int dropped = 0; unsigned int drop_len = 0; __skb_queue_head_init(&list); for (i = 0; i < q->maxflows; i++) { slot = &q->slots[i]; if (!slot->qlen) continue; while (slot->qlen) { skb = slot_dequeue_head(slot); sfq_dec(q, i); __skb_queue_tail(&list, skb); } slot->backlog = 0; red_set_vars(&slot->vars); q->ht[slot->hash] = SFQ_EMPTY_SLOT; } q->tail = NULL; while ((skb = __skb_dequeue(&list)) != NULL) { unsigned int hash = sfq_hash(q, skb); sfq_index x = q->ht[hash]; slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) { drop: qdisc_qstats_backlog_dec(sch, skb); drop_len += qdisc_pkt_len(skb); kfree_skb(skb); dropped++; continue; } q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; } if (slot->qlen >= q->maxdepth) goto drop; slot_queue_add(slot, skb); if (q->red_parms) slot->vars.qavg = red_calc_qavg(q->red_parms, &slot->vars, slot->backlog); slot->backlog += qdisc_pkt_len(skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; } else { slot->next = q->tail->next; q->tail->next = x; } q->tail = slot; slot->allot = q->quantum; } } sch->q.qlen -= dropped; qdisc_tree_reduce_backlog(sch, dropped, drop_len); } static void sfq_perturbation(struct timer_list *t) { struct sfq_sched_data *q = from_timer(q, t, perturb_timer); struct Qdisc *sch = q->sch; spinlock_t *root_lock; siphash_key_t nkey; int period; get_random_bytes(&nkey, sizeof(nkey)); rcu_read_lock(); root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); q->perturbation = nkey; if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); /* q->perturb_period can change under us from * sfq_change() and sfq_destroy(). */ period = READ_ONCE(q->perturb_period); if (period) mod_timer(&q->perturb_timer, jiffies + period); rcu_read_unlock(); } static int sfq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen, dropped = 0; struct red_parms *p = NULL; struct sk_buff *to_free = NULL; struct sk_buff *tail = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) ctl_v1 = nla_data(opt); if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; if ((int)ctl->quantum < 0) { NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); return -EINVAL; } if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; } sch_tree_lock(sch); if (ctl->quantum) q->quantum = ctl->quantum; WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); if (ctl->flows) q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { q->divisor = ctl->divisor; q->maxflows = min_t(u32, q->maxflows, q->divisor); } if (ctl_v1) { if (ctl_v1->depth) q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); if (p) { swap(q->red_parms, p); red_set_parms(q->red_parms, ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Plog, ctl_v1->Scell_log, NULL, ctl_v1->max_P); } q->flags = ctl_v1->flags; q->headdrop = ctl_v1->headdrop; } if (ctl->limit) { q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); q->maxflows = min_t(u32, q->maxflows, q->limit); } qlen = sch->q.qlen; while (sch->q.qlen > q->limit) { dropped += sfq_drop(sch, &to_free); if (!tail) tail = to_free; } rtnl_kfree_skbs(to_free, tail); qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); get_random_bytes(&q->perturbation, sizeof(q->perturbation)); } sch_tree_unlock(sch); kfree(p); return 0; } static void *sfq_alloc(size_t sz) { return kvmalloc(sz, GFP_KERNEL); } static void sfq_free(void *addr) { kvfree(addr); } static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); WRITE_ONCE(q->perturb_period, 0); del_timer_sync(&q->perturb_timer); sfq_free(q->ht); sfq_free(q->slots); kfree(q->red_parms); } static int sfq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct sfq_sched_data *q = qdisc_priv(sch); int i; int err; q->sch = sch; timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) return err; for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; q->dep[i].prev = i + SFQ_MAX_FLOWS; } q->limit = SFQ_MAX_DEPTH; q->maxdepth = SFQ_MAX_DEPTH; q->cur_depth = 0; q->tail = NULL; q->divisor = SFQ_DEFAULT_HASH_DIVISOR; q->maxflows = SFQ_DEFAULT_FLOWS; q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { int err = sfq_change(sch, opt, extack); if (err) return err; } q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); if (!q->ht || !q->slots) { /* Note: sfq_destroy() will be called by our caller */ return -ENOMEM; } for (i = 0; i < q->divisor; i++) q->ht[i] = SFQ_EMPTY_SLOT; for (i = 0; i < q->maxflows; i++) { slot_queue_init(&q->slots[i]); sfq_link(q, i); } if (q->limit >= 1) sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt_v1 opt; struct red_parms *p = q->red_parms; memset(&opt, 0, sizeof(opt)); opt.v0.quantum = q->quantum; opt.v0.perturb_period = q->perturb_period / HZ; opt.v0.limit = q->limit; opt.v0.divisor = q->divisor; opt.v0.flows = q->maxflows; opt.depth = q->maxdepth; opt.headdrop = q->headdrop; if (p) { opt.qth_min = p->qth_min >> p->Wlog; opt.qth_max = p->qth_max >> p->Wlog; opt.Wlog = p->Wlog; opt.Plog = p->Plog; opt.Scell_log = p->Scell_log; opt.max_P = p->max_P; } memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); opt.flags = q->flags; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; nla_put_failure: nlmsg_trim(skb, b); return -1; } static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; } static unsigned long sfq_find(struct Qdisc *sch, u32 classid) { return 0; } static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { return 0; } static void sfq_unbind(struct Qdisc *q, unsigned long cl) { } static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl, struct netlink_ext_ack *extack) { struct sfq_sched_data *q = qdisc_priv(sch); if (cl) return NULL; return q->block; } static int sfq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { tcm->tcm_handle |= TC_H_MIN(cl); return 0; } static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index idx = q->ht[cl - 1]; struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; xstats.allot = slot->allot; qs.qlen = slot->qlen; qs.backlog = slot->backlog; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); } static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int i; if (arg->stop) return; for (i = 0; i < q->divisor; i++) { if (q->ht[i] == SFQ_EMPTY_SLOT) { arg->count++; continue; } if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; } } static const struct Qdisc_class_ops sfq_class_ops = { .leaf = sfq_leaf, .find = sfq_find, .tcf_block = sfq_tcf_block, .bind_tcf = sfq_bind, .unbind_tcf = sfq_unbind, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, .walk = sfq_walk, }; static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .cl_ops = &sfq_class_ops, .id = "sfq", .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, .peek = qdisc_peek_dequeued, .init = sfq_init, .reset = sfq_reset, .destroy = sfq_destroy, .change = NULL, .dump = sfq_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("sfq"); static int __init sfq_module_init(void) { return register_qdisc(&sfq_qdisc_ops); } static void __exit sfq_module_exit(void) { unregister_qdisc(&sfq_qdisc_ops); } module_init(sfq_module_init) module_exit(sfq_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Stochastic Fairness qdisc");
1 4 2 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <linux/splice.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "splice.h" struct io_splice { struct file *file_out; loff_t off_out; loff_t off_in; u64 len; int splice_fd_in; unsigned int flags; struct io_rsrc_node *rsrc_node; }; static int __io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); sp->rsrc_node = NULL; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_tee_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) return -EINVAL; return __io_splice_prep(req, sqe); } void io_splice_cleanup(struct io_kiocb *req) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); io_put_rsrc_node(req->ctx, sp->rsrc_node); } static struct file *io_splice_get_file(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct io_ring_ctx *ctx = req->ctx; struct io_rsrc_node *node; struct file *file = NULL; if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) return io_file_get_normal(req, sp->splice_fd_in); io_ring_submit_lock(ctx, issue_flags); node = io_rsrc_node_lookup(&ctx->file_table.data, sp->splice_fd_in); if (node) { node->refs++; sp->rsrc_node = node; file = io_slot_file(node); req->flags |= REQ_F_NEED_CLEANUP; } io_ring_submit_unlock(ctx, issue_flags); return file; } int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; struct file *in; ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); in = io_splice_get_file(req, issue_flags); if (!in) { ret = -EBADF; goto done; } if (sp->len) ret = do_tee(in, out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) fput(in); done: if (ret != sp->len) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); sp->off_in = READ_ONCE(sqe->splice_off_in); sp->off_out = READ_ONCE(sqe->off); return __io_splice_prep(req, sqe); } int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; struct file *in; ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); in = io_splice_get_file(req, issue_flags); if (!in) { ret = -EBADF; goto done; } poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; if (sp->len) ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) fput(in); done: if (ret != sp->len) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; }
4 2 2 4 4 1 4 4 3 3 10 10 6 6 4 3 1 10 2 8 8 1 2 1 1 14 14 13 14 14 4 1 1 1 1 1 1 1 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 // SPDX-License-Identifier: GPL-2.0-only /* * Minimal file system backend for holding eBPF maps and programs, * used by bpf(2) object pinning. * * Authors: * * Daniel Borkmann <daniel@iogearbox.net> */ #include <linux/init.h> #include <linux/magic.h> #include <linux/major.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/kdev_t.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/kstrtox.h> #include "preload/bpf_preload.h" enum bpf_type { BPF_TYPE_UNSPEC = 0, BPF_TYPE_PROG, BPF_TYPE_MAP, BPF_TYPE_LINK, }; static void *bpf_any_get(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: bpf_prog_inc(raw); break; case BPF_TYPE_MAP: bpf_map_inc_with_uref(raw); break; case BPF_TYPE_LINK: bpf_link_inc(raw); break; default: WARN_ON_ONCE(1); break; } return raw; } static void bpf_any_put(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: bpf_prog_put(raw); break; case BPF_TYPE_MAP: bpf_map_put_with_uref(raw); break; case BPF_TYPE_LINK: bpf_link_put(raw); break; default: WARN_ON_ONCE(1); break; } } static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) { void *raw; raw = bpf_map_get_with_uref(ufd); if (!IS_ERR(raw)) { *type = BPF_TYPE_MAP; return raw; } raw = bpf_prog_get(ufd); if (!IS_ERR(raw)) { *type = BPF_TYPE_PROG; return raw; } raw = bpf_link_get_from_fd(ufd); if (!IS_ERR(raw)) { *type = BPF_TYPE_LINK; return raw; } return ERR_PTR(-EINVAL); } static const struct inode_operations bpf_dir_iops; static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode) { struct inode *inode; switch (mode & S_IFMT) { case S_IFDIR: case S_IFREG: case S_IFLNK: break; default: return ERR_PTR(-EINVAL); } inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOSPC); inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); return inode; } static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) { *type = BPF_TYPE_UNSPEC; if (inode->i_op == &bpf_prog_iops) *type = BPF_TYPE_PROG; else if (inode->i_op == &bpf_map_iops) *type = BPF_TYPE_MAP; else if (inode->i_op == &bpf_link_iops) *type = BPF_TYPE_LINK; else return -EACCES; return 0; } static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, struct inode *dir) { d_instantiate(dentry, inode); dget(dentry); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &bpf_dir_iops; inode->i_fop = &simple_dir_operations; inc_nlink(inode); inc_nlink(dir); bpf_dentry_finalize(dentry, inode, dir); return 0; } struct map_iter { void *key; bool done; }; static struct map_iter *map_iter(struct seq_file *m) { return m->private; } static struct bpf_map *seq_file_to_map(struct seq_file *m) { return file_inode(m->file)->i_private; } static void map_iter_free(struct map_iter *iter) { if (iter) { kfree(iter->key); kfree(iter); } } static struct map_iter *map_iter_alloc(struct bpf_map *map) { struct map_iter *iter; iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); if (!iter) goto error; iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); if (!iter->key) goto error; return iter; error: map_iter_free(iter); return NULL; } static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) { struct bpf_map *map = seq_file_to_map(m); void *key = map_iter(m)->key; void *prev_key; (*pos)++; if (map_iter(m)->done) return NULL; if (unlikely(v == SEQ_START_TOKEN)) prev_key = NULL; else prev_key = key; rcu_read_lock(); if (map->ops->map_get_next_key(map, prev_key, key)) { map_iter(m)->done = true; key = NULL; } rcu_read_unlock(); return key; } static void *map_seq_start(struct seq_file *m, loff_t *pos) { if (map_iter(m)->done) return NULL; return *pos ? map_iter(m)->key : SEQ_START_TOKEN; } static void map_seq_stop(struct seq_file *m, void *v) { } static int map_seq_show(struct seq_file *m, void *v) { struct bpf_map *map = seq_file_to_map(m); void *key = map_iter(m)->key; if (unlikely(v == SEQ_START_TOKEN)) { seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); seq_puts(m, "# WARNING!! The output format will change\n"); } else { map->ops->map_seq_show_elem(map, key, m); } return 0; } static const struct seq_operations bpffs_map_seq_ops = { .start = map_seq_start, .next = map_seq_next, .show = map_seq_show, .stop = map_seq_stop, }; static int bpffs_map_open(struct inode *inode, struct file *file) { struct bpf_map *map = inode->i_private; struct map_iter *iter; struct seq_file *m; int err; iter = map_iter_alloc(map); if (!iter) return -ENOMEM; err = seq_open(file, &bpffs_map_seq_ops); if (err) { map_iter_free(iter); return err; } m = file->private_data; m->private = iter; return 0; } static int bpffs_map_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; map_iter_free(map_iter(m)); return seq_release(inode, file); } /* bpffs_map_fops should only implement the basic * read operation for a BPF map. The purpose is to * provide a simple user intuitive way to do * "cat bpffs/pathto/a-pinned-map". * * Other operations (e.g. write, lookup...) should be realized by * the userspace tools (e.g. bpftool) through the * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update * interface. */ static const struct file_operations bpffs_map_fops = { .open = bpffs_map_open, .read = seq_read, .release = bpffs_map_release, }; static int bpffs_obj_open(struct inode *inode, struct file *file) { return -EIO; } static const struct file_operations bpffs_obj_fops = { .open = bpffs_obj_open, }; static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, const struct inode_operations *iops, const struct file_operations *fops) { struct inode *dir = dentry->d_parent->d_inode; struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = iops; inode->i_fop = fops; inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); return 0; } static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, &bpffs_obj_fops); } static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) { struct bpf_map *map = arg; return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, bpf_map_support_seq_show(map) ? &bpffs_map_fops : &bpffs_obj_fops); } static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) { struct bpf_link *link = arg; return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, bpf_link_is_iter(link) ? &bpf_iter_fops : &bpffs_obj_fops); } static struct dentry * bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future * extensions. That allows popoulate_bpffs() create special files. */ if ((dir->i_mode & S_IALLUGO) && strchr(dentry->d_name.name, '.')) return ERR_PTR(-EPERM); return simple_lookup(dir, dentry, flags); } static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *target) { char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); struct inode *inode; if (!link) return -ENOMEM; inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); if (IS_ERR(inode)) { kfree(link); return PTR_ERR(inode); } inode->i_op = &simple_symlink_inode_operations; inode->i_link = link; bpf_dentry_finalize(dentry, inode, dir); return 0; } static const struct inode_operations bpf_dir_iops = { .lookup = bpf_lookup, .mkdir = bpf_mkdir, .symlink = bpf_symlink, .rmdir = simple_rmdir, .rename = simple_rename, .link = simple_link, .unlink = simple_unlink, }; /* pin iterator link into bpffs */ static int bpf_iter_link_pin_kernel(struct dentry *parent, const char *name, struct bpf_link *link) { umode_t mode = S_IFREG | S_IRUSR; struct dentry *dentry; int ret; inode_lock(parent->d_inode); dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) { inode_unlock(parent->d_inode); return PTR_ERR(dentry); } ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, &bpf_iter_fops); dput(dentry); inode_unlock(parent->d_inode); return ret; } static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, enum bpf_type type) { struct dentry *dentry; struct inode *dir; struct path path; umode_t mode; int ret; dentry = user_path_create(path_fd, pathname, &path, 0); if (IS_ERR(dentry)) return PTR_ERR(dentry); dir = d_inode(path.dentry); if (dir->i_op != &bpf_dir_iops) { ret = -EPERM; goto out; } mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); ret = security_path_mknod(&path, dentry, mode, 0); if (ret) goto out; switch (type) { case BPF_TYPE_PROG: ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); break; case BPF_TYPE_MAP: ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); break; case BPF_TYPE_LINK: ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); break; default: ret = -EPERM; } out: done_path_create(&path, dentry); return ret; } int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) { enum bpf_type type; void *raw; int ret; raw = bpf_fd_probe_obj(ufd, &type); if (IS_ERR(raw)) return PTR_ERR(raw); ret = bpf_obj_do_pin(path_fd, pathname, raw, type); if (ret != 0) bpf_any_put(raw, type); return ret; } static void *bpf_obj_do_get(int path_fd, const char __user *pathname, enum bpf_type *type, int flags) { struct inode *inode; struct path path; void *raw; int ret; ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); inode = d_backing_inode(path.dentry); ret = path_permission(&path, ACC_MODE(flags)); if (ret) goto out; ret = bpf_inode_type(inode, type); if (ret) goto out; raw = bpf_any_get(inode->i_private, *type); if (!IS_ERR(raw)) touch_atime(&path); path_put(&path); return raw; out: path_put(&path); return ERR_PTR(ret); } int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; int f_flags; void *raw; int ret; f_flags = bpf_get_file_flag(flags); if (f_flags < 0) return f_flags; raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); if (IS_ERR(raw)) return PTR_ERR(raw); if (type == BPF_TYPE_PROG) ret = bpf_prog_new_fd(raw); else if (type == BPF_TYPE_MAP) ret = bpf_map_new_fd(raw, f_flags); else if (type == BPF_TYPE_LINK) ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); else return -ENOENT; if (ret < 0) bpf_any_put(raw, type); return ret; } static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) { struct bpf_prog *prog; int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); if (ret) return ERR_PTR(ret); if (inode->i_op == &bpf_map_iops) return ERR_PTR(-EINVAL); if (inode->i_op == &bpf_link_iops) return ERR_PTR(-EINVAL); if (inode->i_op != &bpf_prog_iops) return ERR_PTR(-EACCES); prog = inode->i_private; ret = security_bpf_prog(prog); if (ret < 0) return ERR_PTR(ret); if (!bpf_prog_get_ok(prog, &type, false)) return ERR_PTR(-EINVAL); bpf_prog_inc(prog); return prog; } struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { struct bpf_prog *prog; struct path path; int ret = kern_path(name, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); prog = __get_prog_inode(d_backing_inode(path.dentry), type); if (!IS_ERR(prog)) touch_atime(&path); path_put(&path); return prog; } EXPORT_SYMBOL(bpf_prog_get_type_path); struct bpffs_btf_enums { const struct btf *btf; const struct btf_type *cmd_t; const struct btf_type *map_t; const struct btf_type *prog_t; const struct btf_type *attach_t; }; static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) { const struct btf *btf; const struct btf_type *t; const char *name; int i, n; memset(info, 0, sizeof(*info)); btf = bpf_get_btf_vmlinux(); if (IS_ERR(btf)) return PTR_ERR(btf); if (!btf) return -ENOENT; info->btf = btf; for (i = 1, n = btf_nr_types(btf); i < n; i++) { t = btf_type_by_id(btf, i); if (!btf_type_is_enum(t)) continue; name = btf_name_by_offset(btf, t->name_off); if (!name) continue; if (strcmp(name, "bpf_cmd") == 0) info->cmd_t = t; else if (strcmp(name, "bpf_map_type") == 0) info->map_t = t; else if (strcmp(name, "bpf_prog_type") == 0) info->prog_t = t; else if (strcmp(name, "bpf_attach_type") == 0) info->attach_t = t; else continue; if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) return 0; } return -ESRCH; } static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, const char *prefix, const char *str, int *value) { const struct btf_enum *e; const char *name; int i, n, pfx_len = strlen(prefix); *value = 0; if (!btf || !enum_t) return false; for (i = 0, n = btf_vlen(enum_t); i < n; i++) { e = &btf_enum(enum_t)[i]; name = btf_name_by_offset(btf, e->name_off); if (!name || strncasecmp(name, prefix, pfx_len) != 0) continue; /* match symbolic name case insensitive and ignoring prefix */ if (strcasecmp(name + pfx_len, str) == 0) { *value = e->val; return true; } } return false; } static void seq_print_delegate_opts(struct seq_file *m, const char *opt_name, const struct btf *btf, const struct btf_type *enum_t, const char *prefix, u64 delegate_msk, u64 any_msk) { const struct btf_enum *e; bool first = true; const char *name; u64 msk; int i, n, pfx_len = strlen(prefix); delegate_msk &= any_msk; /* clear unknown bits */ if (delegate_msk == 0) return; seq_printf(m, ",%s", opt_name); if (delegate_msk == any_msk) { seq_printf(m, "=any"); return; } if (btf && enum_t) { for (i = 0, n = btf_vlen(enum_t); i < n; i++) { e = &btf_enum(enum_t)[i]; name = btf_name_by_offset(btf, e->name_off); if (!name || strncasecmp(name, prefix, pfx_len) != 0) continue; msk = 1ULL << e->val; if (delegate_msk & msk) { /* emit lower-case name without prefix */ seq_putc(m, first ? '=' : ':'); name += pfx_len; while (*name) { seq_putc(m, tolower(*name)); name++; } delegate_msk &= ~msk; first = false; } } } if (delegate_msk) seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); } /* * Display the mount options in /proc/mounts. */ static int bpf_show_options(struct seq_file *m, struct dentry *root) { struct inode *inode = d_inode(root); umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; struct bpf_mount_opts *opts = root->d_sb->s_fs_info; u64 mask; if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, inode->i_uid)); if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, inode->i_gid)); if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); if (opts->delegate_cmds || opts->delegate_maps || opts->delegate_progs || opts->delegate_attachs) { struct bpffs_btf_enums info; /* ignore errors, fallback to hex */ (void)find_bpffs_btf_enums(&info); mask = (1ULL << __MAX_BPF_CMD) - 1; seq_print_delegate_opts(m, "delegate_cmds", info.btf, info.cmd_t, "BPF_", opts->delegate_cmds, mask); mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; seq_print_delegate_opts(m, "delegate_maps", info.btf, info.map_t, "BPF_MAP_TYPE_", opts->delegate_maps, mask); mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; seq_print_delegate_opts(m, "delegate_progs", info.btf, info.prog_t, "BPF_PROG_TYPE_", opts->delegate_progs, mask); mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; seq_print_delegate_opts(m, "delegate_attachs", info.btf, info.attach_t, "BPF_", opts->delegate_attachs, mask); } return 0; } static void bpf_free_inode(struct inode *inode) { enum bpf_type type; if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); if (!bpf_inode_type(inode, &type)) bpf_any_put(inode->i_private, type); free_inode_nonrcu(inode); } const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, .free_inode = bpf_free_inode, }; enum { OPT_UID, OPT_GID, OPT_MODE, OPT_DELEGATE_CMDS, OPT_DELEGATE_MAPS, OPT_DELEGATE_PROGS, OPT_DELEGATE_ATTACHS, }; static const struct fs_parameter_spec bpf_fs_parameters[] = { fsparam_u32 ("uid", OPT_UID), fsparam_u32 ("gid", OPT_GID), fsparam_u32oct ("mode", OPT_MODE), fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), {} }; static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct bpf_mount_opts *opts = fc->s_fs_info; struct fs_parse_result result; kuid_t uid; kgid_t gid; int opt, err; opt = fs_parse(fc, bpf_fs_parameters, param, &result); if (opt < 0) { /* We might like to report bad mount options here, but * traditionally we've ignored all mount options, so we'd * better continue to ignore non-existing options for bpf. */ if (opt == -ENOPARAM) { opt = vfs_parse_fs_param_source(fc, param); if (opt != -ENOPARAM) return opt; return 0; } if (opt < 0) return opt; } switch (opt) { case OPT_UID: uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) goto bad_value; /* * The requested uid must be representable in the * filesystem's idmapping. */ if (!kuid_has_mapping(fc->user_ns, uid)) goto bad_value; opts->uid = uid; break; case OPT_GID: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) goto bad_value; /* * The requested gid must be representable in the * filesystem's idmapping. */ if (!kgid_has_mapping(fc->user_ns, gid)) goto bad_value; opts->gid = gid; break; case OPT_MODE: opts->mode = result.uint_32 & S_IALLUGO; break; case OPT_DELEGATE_CMDS: case OPT_DELEGATE_MAPS: case OPT_DELEGATE_PROGS: case OPT_DELEGATE_ATTACHS: { struct bpffs_btf_enums info; const struct btf_type *enum_t; const char *enum_pfx; u64 *delegate_msk, msk = 0; char *p, *str; int val; /* ignore errors, fallback to hex */ (void)find_bpffs_btf_enums(&info); switch (opt) { case OPT_DELEGATE_CMDS: delegate_msk = &opts->delegate_cmds; enum_t = info.cmd_t; enum_pfx = "BPF_"; break; case OPT_DELEGATE_MAPS: delegate_msk = &opts->delegate_maps; enum_t = info.map_t; enum_pfx = "BPF_MAP_TYPE_"; break; case OPT_DELEGATE_PROGS: delegate_msk = &opts->delegate_progs; enum_t = info.prog_t; enum_pfx = "BPF_PROG_TYPE_"; break; case OPT_DELEGATE_ATTACHS: delegate_msk = &opts->delegate_attachs; enum_t = info.attach_t; enum_pfx = "BPF_"; break; default: return -EINVAL; } str = param->string; while ((p = strsep(&str, ":"))) { if (strcmp(p, "any") == 0) { msk |= ~0ULL; } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { msk |= 1ULL << val; } else { err = kstrtou64(p, 0, &msk); if (err) return err; } } /* Setting delegation mount options requires privileges */ if (msk && !capable(CAP_SYS_ADMIN)) return -EPERM; *delegate_msk |= msk; break; } default: /* ignore unknown mount options */ break; } return 0; bad_value: return invalfc(fc, "Bad value for '%s'", param->key); } struct bpf_preload_ops *bpf_preload_ops; EXPORT_SYMBOL_GPL(bpf_preload_ops); static bool bpf_preload_mod_get(void) { /* If bpf_preload.ko wasn't loaded earlier then load it now. * When bpf_preload is built into vmlinux the module's __init * function will populate it. */ if (!bpf_preload_ops) { request_module("bpf_preload"); if (!bpf_preload_ops) return false; } /* And grab the reference, so the module doesn't disappear while the * kernel is interacting with the kernel module and its UMD. */ if (!try_module_get(bpf_preload_ops->owner)) { pr_err("bpf_preload module get failed.\n"); return false; } return true; } static void bpf_preload_mod_put(void) { if (bpf_preload_ops) /* now user can "rmmod bpf_preload" if necessary */ module_put(bpf_preload_ops->owner); } static DEFINE_MUTEX(bpf_preload_lock); static int populate_bpffs(struct dentry *parent) { struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; int err = 0, i; /* grab the mutex to make sure the kernel interactions with bpf_preload * are serialized */ mutex_lock(&bpf_preload_lock); /* if bpf_preload.ko wasn't built into vmlinux then load it */ if (!bpf_preload_mod_get()) goto out; err = bpf_preload_ops->preload(objs); if (err) goto out_put; for (i = 0; i < BPF_PRELOAD_LINKS; i++) { bpf_link_inc(objs[i].link); err = bpf_iter_link_pin_kernel(parent, objs[i].link_name, objs[i].link); if (err) { bpf_link_put(objs[i].link); goto out_put; } } out_put: bpf_preload_mod_put(); out: mutex_unlock(&bpf_preload_lock); return err; } static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; struct bpf_mount_opts *opts = sb->s_fs_info; struct inode *inode; int ret; /* Mounting an instance of BPF FS requires privileges */ if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) return -EPERM; ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); if (ret) return ret; sb->s_op = &bpf_super_ops; inode = sb->s_root->d_inode; inode->i_uid = opts->uid; inode->i_gid = opts->gid; inode->i_op = &bpf_dir_iops; inode->i_mode &= ~S_IALLUGO; populate_bpffs(sb->s_root); inode->i_mode |= S_ISVTX | opts->mode; return 0; } static int bpf_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, bpf_fill_super); } static void bpf_free_fc(struct fs_context *fc) { kfree(fc->s_fs_info); } static const struct fs_context_operations bpf_context_ops = { .free = bpf_free_fc, .parse_param = bpf_parse_param, .get_tree = bpf_get_tree, }; /* * Set up the filesystem mount context. */ static int bpf_init_fs_context(struct fs_context *fc) { struct bpf_mount_opts *opts; opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); if (!opts) return -ENOMEM; opts->mode = S_IRWXUGO; opts->uid = current_fsuid(); opts->gid = current_fsgid(); /* start out with no BPF token delegation enabled */ opts->delegate_cmds = 0; opts->delegate_maps = 0; opts->delegate_progs = 0; opts->delegate_attachs = 0; fc->s_fs_info = opts; fc->ops = &bpf_context_ops; return 0; } static void bpf_kill_super(struct super_block *sb) { struct bpf_mount_opts *opts = sb->s_fs_info; kill_litter_super(sb); kfree(opts); } static struct file_system_type bpf_fs_type = { .owner = THIS_MODULE, .name = "bpf", .init_fs_context = bpf_init_fs_context, .parameters = bpf_fs_parameters, .kill_sb = bpf_kill_super, .fs_flags = FS_USERNS_MOUNT, }; static int __init bpf_init(void) { int ret; ret = sysfs_create_mount_point(fs_kobj, "bpf"); if (ret) return ret; ret = register_filesystem(&bpf_fs_type); if (ret) sysfs_remove_mount_point(fs_kobj, "bpf"); return ret; } fs_initcall(bpf_init);
20 20 20 20 20 20 28 25 25 28 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 // SPDX-License-Identifier: GPL-2.0 /* * Componentized device handling. */ #include <linux/component.h> #include <linux/device.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/of.h> #include <linux/slab.h> #include <linux/debugfs.h> /** * DOC: overview * * The component helper allows drivers to collect a pile of sub-devices, * including their bound drivers, into an aggregate driver. Various subsystems * already provide functions to get hold of such components, e.g. * of_clk_get_by_name(). The component helper can be used when such a * subsystem-specific way to find a device is not available: The component * helper fills the niche of aggregate drivers for specific hardware, where * further standardization into a subsystem would not be practical. The common * example is when a logical device (e.g. a DRM display driver) is spread around * the SoC on various components (scanout engines, blending blocks, transcoders * for various outputs and so on). * * The component helper also doesn't solve runtime dependencies, e.g. for system * suspend and resume operations. See also :ref:`device links<device_link>`. * * Components are registered using component_add() and unregistered with * component_del(), usually from the driver's probe and disconnect functions. * * Aggregate drivers first assemble a component match list of what they need * using component_match_add(). This is then registered as an aggregate driver * using component_master_add_with_match(), and unregistered using * component_master_del(). */ struct component; struct component_match_array { void *data; int (*compare)(struct device *, void *); int (*compare_typed)(struct device *, int, void *); void (*release)(struct device *, void *); struct component *component; bool duplicate; }; struct component_match { size_t alloc; size_t num; struct component_match_array *compare; }; struct aggregate_device { struct list_head node; bool bound; const struct component_master_ops *ops; struct device *parent; struct component_match *match; }; struct component { struct list_head node; struct aggregate_device *adev; bool bound; const struct component_ops *ops; int subcomponent; struct device *dev; }; static DEFINE_MUTEX(component_mutex); static LIST_HEAD(component_list); static LIST_HEAD(aggregate_devices); #ifdef CONFIG_DEBUG_FS static struct dentry *component_debugfs_dir; static int component_devices_show(struct seq_file *s, void *data) { struct aggregate_device *m = s->private; struct component_match *match = m->match; size_t i; mutex_lock(&component_mutex); seq_printf(s, "%-40s %20s\n", "aggregate_device name", "status"); seq_puts(s, "-------------------------------------------------------------\n"); seq_printf(s, "%-40s %20s\n\n", dev_name(m->parent), m->bound ? "bound" : "not bound"); seq_printf(s, "%-40s %20s\n", "device name", "status"); seq_puts(s, "-------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { struct component *component = match->compare[i].component; seq_printf(s, "%-40s %20s\n", component ? dev_name(component->dev) : "(unknown)", component ? (component->bound ? "bound" : "not bound") : "not registered"); } mutex_unlock(&component_mutex); return 0; } DEFINE_SHOW_ATTRIBUTE(component_devices); static int __init component_debug_init(void) { component_debugfs_dir = debugfs_create_dir("device_component", NULL); return 0; } core_initcall(component_debug_init); static void component_debugfs_add(struct aggregate_device *m) { debugfs_create_file(dev_name(m->parent), 0444, component_debugfs_dir, m, &component_devices_fops); } static void component_debugfs_del(struct aggregate_device *m) { debugfs_lookup_and_remove(dev_name(m->parent), component_debugfs_dir); } #else static void component_debugfs_add(struct aggregate_device *m) { } static void component_debugfs_del(struct aggregate_device *m) { } #endif static struct aggregate_device *__aggregate_find(struct device *parent, const struct component_master_ops *ops) { struct aggregate_device *m; list_for_each_entry(m, &aggregate_devices, node) if (m->parent == parent && (!ops || m->ops == ops)) return m; return NULL; } static struct component *find_component(struct aggregate_device *adev, struct component_match_array *mc) { struct component *c; list_for_each_entry(c, &component_list, node) { if (c->adev && c->adev != adev) continue; if (mc->compare && mc->compare(c->dev, mc->data)) return c; if (mc->compare_typed && mc->compare_typed(c->dev, c->subcomponent, mc->data)) return c; } return NULL; } static int find_components(struct aggregate_device *adev) { struct component_match *match = adev->match; size_t i; int ret = 0; /* * Scan the array of match functions and attach * any components which are found to this adev. */ for (i = 0; i < match->num; i++) { struct component_match_array *mc = &match->compare[i]; struct component *c; dev_dbg(adev->parent, "Looking for component %zu\n", i); if (match->compare[i].component) continue; c = find_component(adev, mc); if (!c) { ret = -ENXIO; break; } dev_dbg(adev->parent, "found component %s, duplicate %u\n", dev_name(c->dev), !!c->adev); /* Attach this component to the adev */ match->compare[i].duplicate = !!c->adev; match->compare[i].component = c; c->adev = adev; } return ret; } /* Detach component from associated aggregate_device */ static void remove_component(struct aggregate_device *adev, struct component *c) { size_t i; /* Detach the component from this adev. */ for (i = 0; i < adev->match->num; i++) if (adev->match->compare[i].component == c) adev->match->compare[i].component = NULL; } /* * Try to bring up an aggregate device. If component is NULL, we're interested * in this aggregate device, otherwise it's a component which must be present * to try and bring up the aggregate device. * * Returns 1 for successful bringup, 0 if not ready, or -ve errno. */ static int try_to_bring_up_aggregate_device(struct aggregate_device *adev, struct component *component) { int ret; dev_dbg(adev->parent, "trying to bring up adev\n"); if (find_components(adev)) { dev_dbg(adev->parent, "master has incomplete components\n"); return 0; } if (component && component->adev != adev) { dev_dbg(adev->parent, "master is not for this component (%s)\n", dev_name(component->dev)); return 0; } if (!devres_open_group(adev->parent, adev, GFP_KERNEL)) return -ENOMEM; /* Found all components */ ret = adev->ops->bind(adev->parent); if (ret < 0) { devres_release_group(adev->parent, NULL); if (ret != -EPROBE_DEFER) dev_info(adev->parent, "adev bind failed: %d\n", ret); return ret; } devres_close_group(adev->parent, NULL); adev->bound = true; return 1; } static int try_to_bring_up_masters(struct component *component) { struct aggregate_device *adev; int ret = 0; list_for_each_entry(adev, &aggregate_devices, node) { if (!adev->bound) { ret = try_to_bring_up_aggregate_device(adev, component); if (ret != 0) break; } } return ret; } static void take_down_aggregate_device(struct aggregate_device *adev) { if (adev->bound) { adev->ops->unbind(adev->parent); devres_release_group(adev->parent, adev); adev->bound = false; } } /** * component_compare_of - A common component compare function for of_node * @dev: component device * @data: @compare_data from component_match_add_release() * * A common compare function when compare_data is device of_node. e.g. * component_match_add_release(masterdev, &match, component_release_of, * component_compare_of, component_dev_of_node) */ int component_compare_of(struct device *dev, void *data) { return device_match_of_node(dev, data); } EXPORT_SYMBOL_GPL(component_compare_of); /** * component_release_of - A common component release function for of_node * @dev: component device * @data: @compare_data from component_match_add_release() * * About the example, Please see component_compare_of(). */ void component_release_of(struct device *dev, void *data) { of_node_put(data); } EXPORT_SYMBOL_GPL(component_release_of); /** * component_compare_dev - A common component compare function for dev * @dev: component device * @data: @compare_data from component_match_add_release() * * A common compare function when compare_data is struce device. e.g. * component_match_add(masterdev, &match, component_compare_dev, component_dev) */ int component_compare_dev(struct device *dev, void *data) { return dev == data; } EXPORT_SYMBOL_GPL(component_compare_dev); /** * component_compare_dev_name - A common component compare function for device name * @dev: component device * @data: @compare_data from component_match_add_release() * * A common compare function when compare_data is device name string. e.g. * component_match_add(masterdev, &match, component_compare_dev_name, * "component_dev_name") */ int component_compare_dev_name(struct device *dev, void *data) { return device_match_name(dev, data); } EXPORT_SYMBOL_GPL(component_compare_dev_name); static void devm_component_match_release(struct device *parent, void *res) { struct component_match *match = res; unsigned int i; for (i = 0; i < match->num; i++) { struct component_match_array *mc = &match->compare[i]; if (mc->release) mc->release(parent, mc->data); } kfree(match->compare); } static int component_match_realloc(struct component_match *match, size_t num) { struct component_match_array *new; if (match->alloc == num) return 0; new = kmalloc_array(num, sizeof(*new), GFP_KERNEL); if (!new) return -ENOMEM; if (match->compare) { memcpy(new, match->compare, sizeof(*new) * min(match->num, num)); kfree(match->compare); } match->compare = new; match->alloc = num; return 0; } static void __component_match_add(struct device *parent, struct component_match **matchptr, void (*release)(struct device *, void *), int (*compare)(struct device *, void *), int (*compare_typed)(struct device *, int, void *), void *compare_data) { struct component_match *match = *matchptr; if (IS_ERR(match)) return; if (!match) { match = devres_alloc(devm_component_match_release, sizeof(*match), GFP_KERNEL); if (!match) { *matchptr = ERR_PTR(-ENOMEM); return; } devres_add(parent, match); *matchptr = match; } if (match->num == match->alloc) { size_t new_size = match->alloc + 16; int ret; ret = component_match_realloc(match, new_size); if (ret) { *matchptr = ERR_PTR(ret); return; } } match->compare[match->num].compare = compare; match->compare[match->num].compare_typed = compare_typed; match->compare[match->num].release = release; match->compare[match->num].data = compare_data; match->compare[match->num].component = NULL; match->num++; } /** * component_match_add_release - add a component match entry with release callback * @parent: parent device of the aggregate driver * @matchptr: pointer to the list of component matches * @release: release function for @compare_data * @compare: compare function to match against all components * @compare_data: opaque pointer passed to the @compare function * * Adds a new component match to the list stored in @matchptr, which the * aggregate driver needs to function. The list of component matches pointed to * by @matchptr must be initialized to NULL before adding the first match. This * only matches against components added with component_add(). * * The allocated match list in @matchptr is automatically released using devm * actions, where upon @release will be called to free any references held by * @compare_data, e.g. when @compare_data is a &device_node that must be * released with of_node_put(). * * See also component_match_add() and component_match_add_typed(). */ void component_match_add_release(struct device *parent, struct component_match **matchptr, void (*release)(struct device *, void *), int (*compare)(struct device *, void *), void *compare_data) { __component_match_add(parent, matchptr, release, compare, NULL, compare_data); } EXPORT_SYMBOL(component_match_add_release); /** * component_match_add_typed - add a component match entry for a typed component * @parent: parent device of the aggregate driver * @matchptr: pointer to the list of component matches * @compare_typed: compare function to match against all typed components * @compare_data: opaque pointer passed to the @compare function * * Adds a new component match to the list stored in @matchptr, which the * aggregate driver needs to function. The list of component matches pointed to * by @matchptr must be initialized to NULL before adding the first match. This * only matches against components added with component_add_typed(). * * The allocated match list in @matchptr is automatically released using devm * actions. * * See also component_match_add_release() and component_match_add_typed(). */ void component_match_add_typed(struct device *parent, struct component_match **matchptr, int (*compare_typed)(struct device *, int, void *), void *compare_data) { __component_match_add(parent, matchptr, NULL, NULL, compare_typed, compare_data); } EXPORT_SYMBOL(component_match_add_typed); static void free_aggregate_device(struct aggregate_device *adev) { struct component_match *match = adev->match; int i; component_debugfs_del(adev); list_del(&adev->node); if (match) { for (i = 0; i < match->num; i++) { struct component *c = match->compare[i].component; if (c) c->adev = NULL; } } kfree(adev); } /** * component_master_add_with_match - register an aggregate driver * @parent: parent device of the aggregate driver * @ops: callbacks for the aggregate driver * @match: component match list for the aggregate driver * * Registers a new aggregate driver consisting of the components added to @match * by calling one of the component_match_add() functions. Once all components in * @match are available, it will be assembled by calling * &component_master_ops.bind from @ops. Must be unregistered by calling * component_master_del(). */ int component_master_add_with_match(struct device *parent, const struct component_master_ops *ops, struct component_match *match) { struct aggregate_device *adev; int ret; /* Reallocate the match array for its true size */ ret = component_match_realloc(match, match->num); if (ret) return ret; adev = kzalloc(sizeof(*adev), GFP_KERNEL); if (!adev) return -ENOMEM; adev->parent = parent; adev->ops = ops; adev->match = match; component_debugfs_add(adev); /* Add to the list of available aggregate devices. */ mutex_lock(&component_mutex); list_add(&adev->node, &aggregate_devices); ret = try_to_bring_up_aggregate_device(adev, NULL); if (ret < 0) free_aggregate_device(adev); mutex_unlock(&component_mutex); return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(component_master_add_with_match); /** * component_master_del - unregister an aggregate driver * @parent: parent device of the aggregate driver * @ops: callbacks for the aggregate driver * * Unregisters an aggregate driver registered with * component_master_add_with_match(). If necessary the aggregate driver is first * disassembled by calling &component_master_ops.unbind from @ops. */ void component_master_del(struct device *parent, const struct component_master_ops *ops) { struct aggregate_device *adev; mutex_lock(&component_mutex); adev = __aggregate_find(parent, ops); if (adev) { take_down_aggregate_device(adev); free_aggregate_device(adev); } mutex_unlock(&component_mutex); } EXPORT_SYMBOL_GPL(component_master_del); static void component_unbind(struct component *component, struct aggregate_device *adev, void *data) { WARN_ON(!component->bound); if (component->ops && component->ops->unbind) component->ops->unbind(component->dev, adev->parent, data); component->bound = false; /* Release all resources claimed in the binding of this component */ devres_release_group(component->dev, component); } /** * component_unbind_all - unbind all components of an aggregate driver * @parent: parent device of the aggregate driver * @data: opaque pointer, passed to all components * * Unbinds all components of the aggregate device by passing @data to their * &component_ops.unbind functions. Should be called from * &component_master_ops.unbind. */ void component_unbind_all(struct device *parent, void *data) { struct aggregate_device *adev; struct component *c; size_t i; WARN_ON(!mutex_is_locked(&component_mutex)); adev = __aggregate_find(parent, NULL); if (!adev) return; /* Unbind components in reverse order */ for (i = adev->match->num; i--; ) if (!adev->match->compare[i].duplicate) { c = adev->match->compare[i].component; component_unbind(c, adev, data); } } EXPORT_SYMBOL_GPL(component_unbind_all); static int component_bind(struct component *component, struct aggregate_device *adev, void *data) { int ret; /* * Each component initialises inside its own devres group. * This allows us to roll-back a failed component without * affecting anything else. */ if (!devres_open_group(adev->parent, NULL, GFP_KERNEL)) return -ENOMEM; /* * Also open a group for the device itself: this allows us * to release the resources claimed against the sub-device * at the appropriate moment. */ if (!devres_open_group(component->dev, component, GFP_KERNEL)) { devres_release_group(adev->parent, NULL); return -ENOMEM; } dev_dbg(adev->parent, "binding %s (ops %ps)\n", dev_name(component->dev), component->ops); ret = component->ops->bind(component->dev, adev->parent, data); if (!ret) { component->bound = true; /* * Close the component device's group so that resources * allocated in the binding are encapsulated for removal * at unbind. Remove the group on the DRM device as we * can clean those resources up independently. */ devres_close_group(component->dev, NULL); devres_remove_group(adev->parent, NULL); dev_info(adev->parent, "bound %s (ops %ps)\n", dev_name(component->dev), component->ops); } else { devres_release_group(component->dev, NULL); devres_release_group(adev->parent, NULL); if (ret != -EPROBE_DEFER) dev_err(adev->parent, "failed to bind %s (ops %ps): %d\n", dev_name(component->dev), component->ops, ret); } return ret; } /** * component_bind_all - bind all components of an aggregate driver * @parent: parent device of the aggregate driver * @data: opaque pointer, passed to all components * * Binds all components of the aggregate @dev by passing @data to their * &component_ops.bind functions. Should be called from * &component_master_ops.bind. */ int component_bind_all(struct device *parent, void *data) { struct aggregate_device *adev; struct component *c; size_t i; int ret = 0; WARN_ON(!mutex_is_locked(&component_mutex)); adev = __aggregate_find(parent, NULL); if (!adev) return -EINVAL; /* Bind components in match order */ for (i = 0; i < adev->match->num; i++) if (!adev->match->compare[i].duplicate) { c = adev->match->compare[i].component; ret = component_bind(c, adev, data); if (ret) break; } if (ret != 0) { for (; i > 0; i--) if (!adev->match->compare[i - 1].duplicate) { c = adev->match->compare[i - 1].component; component_unbind(c, adev, data); } } return ret; } EXPORT_SYMBOL_GPL(component_bind_all); static int __component_add(struct device *dev, const struct component_ops *ops, int subcomponent) { struct component *component; int ret; component = kzalloc(sizeof(*component), GFP_KERNEL); if (!component) return -ENOMEM; component->ops = ops; component->dev = dev; component->subcomponent = subcomponent; dev_dbg(dev, "adding component (ops %ps)\n", ops); mutex_lock(&component_mutex); list_add_tail(&component->node, &component_list); ret = try_to_bring_up_masters(component); if (ret < 0) { if (component->adev) remove_component(component->adev, component); list_del(&component->node); kfree(component); } mutex_unlock(&component_mutex); return ret < 0 ? ret : 0; } /** * component_add_typed - register a component * @dev: component device * @ops: component callbacks * @subcomponent: nonzero identifier for subcomponents * * Register a new component for @dev. Functions in @ops will be call when the * aggregate driver is ready to bind the overall driver by calling * component_bind_all(). See also &struct component_ops. * * @subcomponent must be nonzero and is used to differentiate between multiple * components registered on the same device @dev. These components are match * using component_match_add_typed(). * * The component needs to be unregistered at driver unload/disconnect by * calling component_del(). * * See also component_add(). */ int component_add_typed(struct device *dev, const struct component_ops *ops, int subcomponent) { if (WARN_ON(subcomponent == 0)) return -EINVAL; return __component_add(dev, ops, subcomponent); } EXPORT_SYMBOL_GPL(component_add_typed); /** * component_add - register a component * @dev: component device * @ops: component callbacks * * Register a new component for @dev. Functions in @ops will be called when the * aggregate driver is ready to bind the overall driver by calling * component_bind_all(). See also &struct component_ops. * * The component needs to be unregistered at driver unload/disconnect by * calling component_del(). * * See also component_add_typed() for a variant that allows multiple different * components on the same device. */ int component_add(struct device *dev, const struct component_ops *ops) { return __component_add(dev, ops, 0); } EXPORT_SYMBOL_GPL(component_add); /** * component_del - unregister a component * @dev: component device * @ops: component callbacks * * Unregister a component added with component_add(). If the component is bound * into an aggregate driver, this will force the entire aggregate driver, including * all its components, to be unbound. */ void component_del(struct device *dev, const struct component_ops *ops) { struct component *c, *component = NULL; mutex_lock(&component_mutex); list_for_each_entry(c, &component_list, node) if (c->dev == dev && c->ops == ops) { list_del(&c->node); component = c; break; } if (component && component->adev) { take_down_aggregate_device(component->adev); remove_component(component->adev, component); } mutex_unlock(&component_mutex); WARN_ON(!component); kfree(component); } EXPORT_SYMBOL_GPL(component_del);
13 13 1 13 6 1 11 19 19 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 // SPDX-License-Identifier: GPL-2.0 /* * Functions related to generic helpers functions */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/scatterlist.h> #include "blk.h" static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) { unsigned int discard_granularity = bdev_discard_granularity(bdev); sector_t granularity_aligned_sector; if (bdev_is_partition(bdev)) sector += bdev->bd_start_sect; granularity_aligned_sector = round_up(sector, discard_granularity >> SECTOR_SHIFT); /* * Make sure subsequent bios start aligned to the discard granularity if * it needs to be split. */ if (granularity_aligned_sector != sector) return granularity_aligned_sector - sector; /* * Align the bio size to the discard granularity to make splitting the bio * at discard granularity boundaries easier in the driver if needed. */ return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; } struct bio *blk_alloc_discard_bio(struct block_device *bdev, sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask) { sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector)); struct bio *bio; if (!bio_sects) return NULL; bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask); if (!bio) return NULL; bio->bi_iter.bi_sector = *sector; bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT; *sector += bio_sects; *nr_sects -= bio_sects; /* * We can loop for a long time in here if someone does full device * discards (like mkfs). Be nice and allow us to schedule out to avoid * softlocking if preempt is disabled. */ cond_resched(); return bio; } int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) { struct bio *bio; while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects, gfp_mask))) *biop = bio_chain_and_submit(*biop, bio); return 0; } EXPORT_SYMBOL(__blkdev_issue_discard); /** * blkdev_issue_discard - queue a discard * @bdev: blockdev to issue discard for * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { struct bio *bio = NULL; struct blk_plug plug; int ret; blk_start_plug(&plug); ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); if (ret == -EOPNOTSUPP) ret = 0; bio_put(bio); } blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL(blkdev_issue_discard); static sector_t bio_write_zeroes_limit(struct block_device *bdev) { sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; return min(bdev_write_zeroes_sectors(bdev), (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); } /* * There is no reliable way for the SCSI subsystem to determine whether a * device supports a WRITE SAME operation without actually performing a write * to media. As a result, write_zeroes is enabled by default and will be * disabled if a zeroing operation subsequently fails. This means that this * queue limit is likely to change at runtime. */ static void __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags, sector_t limit) { while (nr_sects) { unsigned int len = min(nr_sects, limit); struct bio *bio; if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) break; bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask); bio->bi_iter.bi_sector = sector; if (flags & BLKDEV_ZERO_NOUNMAP) bio->bi_opf |= REQ_NOUNMAP; bio->bi_iter.bi_size = len << SECTOR_SHIFT; *biop = bio_chain_and_submit(*biop, bio); nr_sects -= len; sector += len; cond_resched(); } } static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; blk_start_plug(&plug); __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags, limit); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { bio_await_chain(bio); blk_finish_plug(&plug); return -EINTR; } ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); /* * For some devices there is no non-destructive way to verify whether * WRITE ZEROES is actually supported. These will clear the capability * on an I/O error, in which case we'll turn any error into * "not supported" here. */ if (ret && !bdev_write_zeroes_sectors(bdev)) return -EOPNOTSUPP; return ret; } /* * Convert a number of 512B sectors to a number of pages. * The result is limited to a number of pages that can fit into a BIO. * Also make sure that the result is always at least 1 (page) for the cases * where nr_sects is lower than the number of sectors in a page. */ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); return min(pages, (sector_t)BIO_MAX_VECS); } static void __blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned int flags) { while (nr_sects) { unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); struct bio *bio; bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); bio->bi_iter.bi_sector = sector; if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) break; do { unsigned int len, added; len = min_t(sector_t, PAGE_SIZE, nr_sects << SECTOR_SHIFT); added = bio_add_page(bio, ZERO_PAGE(0), len, 0); if (added < len) break; nr_sects -= added >> SECTOR_SHIFT; sector += added >> SECTOR_SHIFT; } while (nr_sects); *biop = bio_chain_and_submit(*biop, bio); cond_resched(); } } static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { struct bio *bio = NULL; struct blk_plug plug; int ret = 0; if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; blk_start_plug(&plug); __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { bio_await_chain(bio); blk_finish_plug(&plug); return -EINTR; } ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); return ret; } /** * __blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) * @biop: pointer to anchor bio * @flags: controls detailed behavior * * Description: * Zero-fill a block range, either using hardware offload or by explicitly * writing zeroes to the device. * * If a device is using logical block provisioning, the underlying space will * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. * * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided. */ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { sector_t limit = bio_write_zeroes_limit(bdev); if (bdev_read_only(bdev)) return -EPERM; if (limit) { __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, biop, flags, limit); } else { if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, biop, flags); } return 0; } EXPORT_SYMBOL(__blkdev_issue_zeroout); /** * blkdev_issue_zeroout - zero-fill a block range * @bdev: blockdev to write * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) * @flags: controls detailed behavior * * Description: * Zero-fill a block range, either using hardware offload or by explicitly * writing zeroes to the device. See __blkdev_issue_zeroout() for the * valid values for %flags. */ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned flags) { int ret; if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1)) return -EINVAL; if (bdev_read_only(bdev)) return -EPERM; if (bdev_write_zeroes_sectors(bdev)) { ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, flags); if (ret != -EOPNOTSUPP) return ret; } return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags); } EXPORT_SYMBOL(blkdev_issue_zeroout); int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp) { sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ if (max_sectors > UINT_MAX >> SECTOR_SHIFT) max_sectors = UINT_MAX >> SECTOR_SHIFT; max_sectors &= ~bs_mask; if (max_sectors == 0) return -EOPNOTSUPP; if ((sector | nr_sects) & bs_mask) return -EINVAL; if (bdev_read_only(bdev)) return -EPERM; blk_start_plug(&plug); while (nr_sects) { unsigned int len = min_t(sector_t, nr_sects, max_sectors); bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = len << SECTOR_SHIFT; sector += len; nr_sects -= len; cond_resched(); } if (bio) { ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL(blkdev_issue_secure_erase);
10662 10641 9358 9359 566 568 20 20 853 20 20 20 20 3 20 20 20 20 20 20 20 3 20 18 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 162 160 162 157 132 38 139 140 6 139 6 23 23 139 139 6 139 139 9 32 137 136 138 137 41 41 15 40 1 1 139 137 34 34 140 365 481 487 830 832 277 830 56 829 148 148 829 616 184 239 833 612 567 569 543 171 171 171 565 564 93 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 // SPDX-License-Identifier: GPL-2.0-only /* * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * Code partially derived from nft_hash * Rewritten with rehash code from br_multicast plus single list * pointer as suggested by Josh Triplett */ #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> #include <linux/sched.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/rhashtable.h> #include <linux/err.h> #include <linux/export.h> #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U union nested_table { union nested_table __rcu *table; struct rhash_lock_head __rcu *bucket; }; static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) { return rht_head_hashfn(ht, tbl, he, ht->p); } #ifdef CONFIG_PROVE_LOCKING #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) int lockdep_rht_mutex_is_held(struct rhashtable *ht) { return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; } EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { if (!debug_locks) return 1; if (unlikely(tbl->nest)) return 1; return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else #define ASSERT_RHT_MUTEX(HT) #endif static inline union nested_table *nested_table_top( const struct bucket_table *tbl) { /* The top-level bucket entry does not need RCU protection * because it's set at the same time as tbl->nest. */ return (void *)rcu_dereference_protected(tbl->buckets[0], 1); } static void nested_table_free(union nested_table *ntbl, unsigned int size) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); const unsigned int len = 1 << shift; unsigned int i; ntbl = rcu_dereference_protected(ntbl->table, 1); if (!ntbl) return; if (size > len) { size >>= shift; for (i = 0; i < len; i++) nested_table_free(ntbl + i, size); } kfree(ntbl); } static void nested_bucket_table_free(const struct bucket_table *tbl) { unsigned int size = tbl->size >> tbl->nest; unsigned int len = 1 << tbl->nest; union nested_table *ntbl; unsigned int i; ntbl = nested_table_top(tbl); for (i = 0; i < len; i++) nested_table_free(ntbl + i, size); kfree(ntbl); } static void bucket_table_free(const struct bucket_table *tbl) { if (tbl->nest) nested_bucket_table_free(tbl); kvfree(tbl); } static void bucket_table_free_rcu(struct rcu_head *head) { bucket_table_free(container_of(head, struct bucket_table, rcu)); } static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, bool leaf) { union nested_table *ntbl; int i; ntbl = rcu_dereference(*prev); if (ntbl) return ntbl; ntbl = alloc_hooks_tag(ht->alloc_tag, kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO)); if (ntbl && leaf) { for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL) return ntbl; /* Raced with another thread. */ kfree(ntbl); return rcu_dereference(*prev); } static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); struct bucket_table *tbl; size_t size; if (nbuckets < (1 << (shift + 1))) return NULL; size = sizeof(*tbl) + sizeof(tbl->buckets[0]); tbl = alloc_hooks_tag(ht->alloc_tag, kmalloc_noprof(size, gfp|__GFP_ZERO)); if (!tbl) return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, false)) { kfree(tbl); return NULL; } tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; return tbl; } static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) { struct bucket_table *tbl = NULL; size_t size; int i; static struct lock_class_key __key; tbl = alloc_hooks_tag(ht->alloc_tag, kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets), gfp|__GFP_ZERO, NUMA_NO_NODE)); size = nbuckets; if (tbl == NULL && !gfpflags_allow_blocking(gfp)) { tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); nbuckets = 0; } if (tbl == NULL) return NULL; lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); tbl->size = size; rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); for (i = 0; i < nbuckets; i++) INIT_RHT_NULLS_HEAD(tbl->buckets[i]); return tbl; } static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, struct bucket_table *tbl) { struct bucket_table *new_tbl; do { new_tbl = tbl; tbl = rht_dereference_rcu(tbl->future_tbl, ht); } while (tbl); return new_tbl; } static int rhashtable_rehash_one(struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); int err = -EAGAIN; struct rhash_head *head, *next, *entry; struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; unsigned long flags; if (new_tbl->nest) goto out; err = -ENOENT; rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); if (rht_is_a_nulls(next)) break; pprev = &entry->next; } if (err) goto out; new_hash = head_hashfn(ht, new_tbl, entry); flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags); if (pprev) rcu_assign_pointer(*pprev, next); else /* Need to preserved the bit lock. */ rht_assign_locked(bkt, next); out: return err; } static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); unsigned long flags; int err; if (!bkt) return 0; flags = rht_lock(old_tbl, bkt); while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; if (err == -ENOENT) err = 0; rht_unlock(old_tbl, bkt, flags); return err; } static int rhashtable_rehash_attach(struct rhashtable *ht, struct bucket_table *old_tbl, struct bucket_table *new_tbl) { /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. * As cmpxchg() provides strong barriers, we do not need * rcu_assign_pointer(). */ if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL, new_tbl) != NULL) return -EEXIST; return 0; } static int rhashtable_rehash_table(struct rhashtable *ht) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl; struct rhashtable_walker *walker; unsigned int old_hash; int err; new_tbl = rht_dereference(old_tbl->future_tbl, ht); if (!new_tbl) return 0; for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { err = rhashtable_rehash_chain(ht, old_hash); if (err) return err; cond_resched(); } /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. * We do this inside the locked region so that * rhashtable_walk_stop() can use rcu_head_after_call_rcu() * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } static int rhashtable_rehash_alloc(struct rhashtable *ht, struct bucket_table *old_tbl, unsigned int size) { struct bucket_table *new_tbl; int err; ASSERT_RHT_MUTEX(ht); new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); if (err) bucket_table_free(new_tbl); return err; } /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink * * This function shrinks the hash table to fit, i.e., the smallest * size would not cause it to expand right away automatically. * * The caller must ensure that no concurrent resizing occurs by holding * ht->mutex. * * The caller must ensure that no concurrent table mutations take place. * It is however valid to have concurrent lookups if they are RCU protected. * * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); unsigned int nelems = atomic_read(&ht->nelems); unsigned int size = 0; if (nelems) size = roundup_pow_of_two(nelems * 3 / 2); if (size < ht->p.min_size) size = ht->p.min_size; if (old_tbl->size <= size) return 0; if (rht_dereference(old_tbl->future_tbl, ht)) return -EEXIST; return rhashtable_rehash_alloc(ht, old_tbl, size); } static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; int err = 0; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) err = rhashtable_shrink(ht); else if (tbl->nest) err = rhashtable_rehash_alloc(ht, tbl, tbl->size); if (!err || err == -EEXIST) { int nerr; nerr = rhashtable_rehash_table(ht); err = err ?: nerr; } mutex_unlock(&ht->mutex); if (err) schedule_work(&ht->run_work); } static int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; unsigned int size; int err; old_tbl = rht_dereference_rcu(ht->tbl, ht); size = tbl->size; err = -EBUSY; if (rht_grow_above_75(ht, tbl)) size *= 2; /* Do not schedule more than one rehash */ else if (old_tbl != tbl) goto fail; err = -ENOMEM; new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN); if (new_tbl == NULL) goto fail; err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { bucket_table_free(new_tbl); if (err == -EEXIST) err = 0; } else schedule_work(&ht->run_work); return err; fail: /* Do not fail the insert if someone else did a rehash. */ if (likely(rcu_access_pointer(tbl->future_tbl))) return 0; /* Schedule async rehash to retry allocation in process context. */ if (err == -ENOMEM) schedule_work(&ht->run_work); return err; } static void *rhashtable_lookup_one(struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; struct rhash_head __rcu **pprev = NULL; struct rhash_head *head; int elasticity; elasticity = RHT_ELASTICITY; rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; elasticity--; if (!key || (ht->p.obj_cmpfn ? ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head)))) { pprev = &head->next; continue; } if (!ht->rhlist) return rht_obj(ht, head); list = container_of(obj, struct rhlist_head, rhead); plist = container_of(head, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); if (pprev) rcu_assign_pointer(*pprev, obj); else /* Need to preserve the bit lock */ rht_assign_locked(bkt, obj); return NULL; } if (elasticity <= 0) return ERR_PTR(-EAGAIN); return ERR_PTR(-ENOENT); } static struct bucket_table *rhashtable_insert_one( struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, void *data) { struct bucket_table *new_tbl; struct rhash_head *head; if (!IS_ERR_OR_NULL(data)) return ERR_PTR(-EEXIST); if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) return ERR_CAST(data); new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (new_tbl) return new_tbl; if (PTR_ERR(data) != -ENOENT) return ERR_CAST(data); if (unlikely(rht_grow_above_max(ht, tbl))) return ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { struct rhlist_head *list; list = container_of(obj, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, NULL); } /* bkt is always the head of the list, so it holds * the lock, which we need to preserve */ rht_assign_locked(bkt, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); return NULL; } static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct rhash_head *obj) { struct bucket_table *new_tbl; struct bucket_table *tbl; struct rhash_lock_head __rcu **bkt; unsigned long flags; unsigned int hash; void *data; new_tbl = rcu_dereference(ht->tbl); do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); if (rcu_access_pointer(tbl->future_tbl)) /* Failure is OK */ bkt = rht_bucket_var(tbl, hash); else bkt = rht_bucket_insert(ht, tbl, hash); if (bkt == NULL) { new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); data = ERR_PTR(-EAGAIN); } else { flags = rht_lock(tbl, bkt); data = rhashtable_lookup_one(ht, bkt, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, bkt, tbl, hash, obj, data); if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); rht_unlock(tbl, bkt, flags); } } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: -EAGAIN); return data; } void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj) { void *data; do { rcu_read_lock(); data = rhashtable_try_insert(ht, key, obj); rcu_read_unlock(); } while (PTR_ERR(data) == -EAGAIN); return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** * rhashtable_walk_enter - Initialise an iterator * @ht: Table to walk over * @iter: Hash table Iterator * * This function prepares a hash table walk. * * Note that if you restart a walk after rhashtable_walk_stop you * may see the same object twice. Also, you may miss objects if * there are removals in between rhashtable_walk_stop and the next * call to rhashtable_walk_start. * * For a completely stable walk you should construct your own data * structure outside the hash table. * * This function may be called from any process context, including * non-preemptable context, but cannot be called from softirq or * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) { iter->ht = ht; iter->p = NULL; iter->slot = 0; iter->skip = 0; iter->end_of_table = 0; spin_lock(&ht->lock); iter->walker.tbl = rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); list_add(&iter->walker.list, &iter->walker.tbl->walkers); spin_unlock(&ht->lock); } EXPORT_SYMBOL_GPL(rhashtable_walk_enter); /** * rhashtable_walk_exit - Free an iterator * @iter: Hash table Iterator * * This function frees resources allocated by rhashtable_walk_enter. */ void rhashtable_walk_exit(struct rhashtable_iter *iter) { spin_lock(&iter->ht->lock); if (iter->walker.tbl) list_del(&iter->walker.list); spin_unlock(&iter->ht->lock); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); /** * rhashtable_walk_start_check - Start a hash table walk * @iter: Hash table iterator * * Start a hash table walk at the current iterator position. Note that we take * the RCU lock in all cases including when we return an error. So you must * always call rhashtable_walk_stop to clean up. * * Returns zero if successful. * * Returns -EAGAIN if resize event occurred. Note that the iterator * will rewind back to the beginning and you may use it immediately * by calling rhashtable_walk_next. * * rhashtable_walk_start is defined as an inline variant that returns * void. This is preferred in cases where the caller would ignore * resize events and always continue. */ int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; bool rhlist = ht->rhlist; rcu_read_lock(); spin_lock(&ht->lock); if (iter->walker.tbl) list_del(&iter->walker.list); spin_unlock(&ht->lock); if (iter->end_of_table) return 0; if (!iter->walker.tbl) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); iter->slot = 0; iter->skip = 0; return -EAGAIN; } if (iter->p && !rhlist) { /* * We need to validate that 'p' is still in the table, and * if so, update 'skip' */ struct rhash_head *p; int skip = 0; rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { skip++; if (p == iter->p) { iter->skip = skip; goto found; } } iter->p = NULL; } else if (iter->p && rhlist) { /* Need to validate that 'list' is still in the table, and * if so, update 'skip' and 'p'. */ struct rhash_head *p; struct rhlist_head *list; int skip = 0; rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { for (list = container_of(p, struct rhlist_head, rhead); list; list = rcu_dereference(list->next)) { skip++; if (list == iter->list) { iter->p = p; iter->skip = skip; goto found; } } } iter->p = NULL; } found: return 0; } EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** * __rhashtable_walk_find_next - Find the next element in a table (or the first * one in case of a new walk). * * @iter: Hash table iterator * * Returns the found object or NULL when the end of the table is reached. * * Returns -EAGAIN if resize event occurred. */ static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; if (!tbl) return NULL; for (; iter->slot < tbl->size; iter->slot++) { int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { if (rhlist) { list = container_of(p, struct rhlist_head, rhead); do { if (!skip) goto next; skip--; list = rcu_dereference(list->next); } while (list); continue; } if (!skip) break; skip--; } next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; iter->list = list; return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; } iter->p = NULL; /* Ensure we see any new tables. */ smp_rmb(); iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (iter->walker.tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); } else { iter->end_of_table = true; } return NULL; } /** * rhashtable_walk_next - Return the next object and advance the iterator * @iter: Hash table iterator * * Note that you must call rhashtable_walk_stop when you are finished * with the walk. * * Returns the next object or NULL when the end of the table is reached. * * Returns -EAGAIN if resize event occurred. Note that the iterator * will rewind back to the beginning and you may continue to use it. */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; if (p) { if (!rhlist || !(list = rcu_dereference(list->next))) { p = rcu_dereference(p->next); list = container_of(p, struct rhlist_head, rhead); } if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; iter->list = list; return rht_obj(ht, rhlist ? &list->rhead : p); } /* At the end of this slot, switch to next one and then find * next entry from that point. */ iter->skip = 0; iter->slot++; } return __rhashtable_walk_find_next(iter); } EXPORT_SYMBOL_GPL(rhashtable_walk_next); /** * rhashtable_walk_peek - Return the next object but don't advance the iterator * @iter: Hash table iterator * * Returns the next object or NULL when the end of the table is reached. * * Returns -EAGAIN if resize event occurred. Note that the iterator * will rewind back to the beginning and you may continue to use it. */ void *rhashtable_walk_peek(struct rhashtable_iter *iter) { struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; if (p) return rht_obj(ht, ht->rhlist ? &list->rhead : p); /* No object found in current iter, find next one in the table. */ if (iter->skip) { /* A nonzero skip value points to the next entry in the table * beyond that last one that was found. Decrement skip so * we find the current value. __rhashtable_walk_find_next * will restore the original value of skip assuming that * the table hasn't changed. */ iter->skip--; } return __rhashtable_walk_find_next(iter); } EXPORT_SYMBOL_GPL(rhashtable_walk_peek); /** * rhashtable_walk_stop - Finish a hash table walk * @iter: Hash table iterator * * Finish a hash table walk. Does not reset the iterator to the start of the * hash table. */ void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { struct rhashtable *ht; struct bucket_table *tbl = iter->walker.tbl; if (!tbl) goto out; ht = iter->ht; spin_lock(&ht->lock); if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; else list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { size_t retsize; if (params->nelem_hint) retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), (unsigned long)params->min_size); else retsize = max(HASH_DEFAULT_SIZE, (unsigned long)params->min_size); return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) { return jhash2(key, length, seed); } /** * rhashtable_init - initialize a new hash table * @ht: hash table to be initialized * @params: configuration parameters * * Initializes a new hash table based on the provided configuration * parameters. A table can be configured either with a variable or * fixed length key: * * Configuration Example 1: Fixed length keys * struct test_obj { * int key; * void * my_member; * struct rhash_head node; * }; * * struct rhashtable_params params = { * .head_offset = offsetof(struct test_obj, node), * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), * .hashfn = jhash, * }; * * Configuration Example 2: Variable length keys * struct test_obj { * [...] * struct rhash_head node; * }; * * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * * return [... hash ...]; * } * * struct rhashtable_params params = { * .head_offset = offsetof(struct test_obj, node), * .hashfn = jhash, * .obj_hashfn = my_hash_fn, * }; */ int rhashtable_init_noprof(struct rhashtable *ht, const struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); memcpy(&ht->p, params, sizeof(*params)); alloc_tag_record(ht->alloc_tag); if (params->min_size) ht->p.min_size = roundup_pow_of_two(params->min_size); /* Cap total entries at 2^31 to avoid nelems overflow. */ ht->max_elems = 1u << 31; if (params->max_size) { ht->p.max_size = rounddown_pow_of_two(params->max_size); if (ht->p.max_size < ht->max_elems / 2) ht->max_elems = ht->p.max_size * 2; } ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); size = rounded_hashtable_size(&ht->p); ht->key_len = ht->p.key_len; if (!params->hashfn) { ht->p.hashfn = jhash; if (!(ht->key_len & (sizeof(u32) - 1))) { ht->key_len /= sizeof(u32); ht->p.hashfn = rhashtable_jhash2; } } /* * This is api initialization and thus we need to guarantee the * initial rhashtable allocation. Upon failure, retry with the * smallest possible size with __GFP_NOFAIL semantics. */ tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (unlikely(tbl == NULL)) { size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL); } atomic_set(&ht->nelems, 0); RCU_INIT_POINTER(ht->tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; } EXPORT_SYMBOL_GPL(rhashtable_init_noprof); /** * rhltable_init - initialize a new hash list table * @hlt: hash list table to be initialized * @params: configuration parameters * * Initializes a new hash list table. * * See documentation for rhashtable_init. */ int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params) { int err; err = rhashtable_init_noprof(&hlt->ht, params); hlt->ht.rhlist = true; return err; } EXPORT_SYMBOL_GPL(rhltable_init_noprof); static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, void (*free_fn)(void *ptr, void *arg), void *arg) { struct rhlist_head *list; if (!ht->rhlist) { free_fn(rht_obj(ht, obj), arg); return; } list = container_of(obj, struct rhlist_head, rhead); do { obj = &list->rhead; list = rht_dereference(list->next, ht); free_fn(rht_obj(ht, obj), arg); } while (list); } /** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy * @free_fn: callback to release resources of element * @arg: pointer passed to free_fn * * Stops an eventual async resize. If defined, invokes free_fn for each * element to releasal resources. Please note that RCU protected * readers may still be accessing the elements. Releasing of resources * must occur in a compatible manner. Then frees the bucket array. * * This function will eventually sleep to wait for an async resize * to complete. The caller is responsible that no further write operations * occurs in parallel. */ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { struct bucket_table *tbl, *next_tbl; unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; cond_resched(); for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) rhashtable_free_one(ht, pos, free_fn, arg); } } next_tbl = rht_dereference(tbl->future_tbl, ht); bucket_table_free(tbl); if (next_tbl) { tbl = next_tbl; goto restart; } mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); void rhashtable_destroy(struct rhashtable *ht) { return rhashtable_free_and_destroy(ht, NULL, NULL); } EXPORT_SYMBOL_GPL(rhashtable_destroy); struct rhash_lock_head __rcu **__rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; union nested_table *ntbl; ntbl = nested_table_top(tbl); ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); subhash >>= tbl->nest; while (ntbl && size > (1 << shift)) { index = subhash & ((1 << shift) - 1); ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); size >>= shift; subhash >>= shift; } if (!ntbl) return NULL; return &ntbl[subhash].bucket; } EXPORT_SYMBOL_GPL(__rht_bucket_nested); struct rhash_lock_head __rcu **rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash) { static struct rhash_lock_head __rcu *rhnull; if (!rhnull) INIT_RHT_NULLS_HEAD(rhnull); return __rht_bucket_nested(tbl, hash) ?: &rhnull; } EXPORT_SYMBOL_GPL(rht_bucket_nested); struct rhash_lock_head __rcu **rht_bucket_nested_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; union nested_table *ntbl; ntbl = nested_table_top(tbl); hash >>= tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, size <= (1 << shift)); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); size >>= shift; hash >>= shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, size <= (1 << shift)); } if (!ntbl) return NULL; return &ntbl[hash].bucket; } EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
4 334 5345 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 /* SPDX-License-Identifier: GPL-2.0-only */ /* * include/linux/idr.h * * 2002-10-18 written by Jim Houston jim.houston@ccur.com * Copyright (C) 2002 by Concurrent Computer Corporation * * Small id to pointer translation service avoiding fixed sized * tables. */ #ifndef __IDR_H__ #define __IDR_H__ #include <linux/radix-tree.h> #include <linux/gfp.h> #include <linux/percpu.h> struct idr { struct radix_tree_root idr_rt; unsigned int idr_base; unsigned int idr_next; }; /* * The IDR API does not expose the tagging functionality of the radix tree * to users. Use tag 0 to track whether a node has free space below it. */ #define IDR_FREE 0 /* Set the IDR flag and the IDR_FREE tag */ #define IDR_RT_MARKER (ROOT_IS_IDR | (__force gfp_t) \ (1 << (ROOT_TAG_SHIFT + IDR_FREE))) #define IDR_INIT_BASE(name, base) { \ .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER), \ .idr_base = (base), \ .idr_next = 0, \ } /** * IDR_INIT() - Initialise an IDR. * @name: Name of IDR. * * A freshly-initialised IDR contains no IDs. */ #define IDR_INIT(name) IDR_INIT_BASE(name, 0) /** * DEFINE_IDR() - Define a statically-allocated IDR. * @name: Name of IDR. * * An IDR defined using this macro is ready for use with no additional * initialisation required. It contains no IDs. */ #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) /** * idr_get_cursor - Return the current position of the cyclic allocator * @idr: idr handle * * The value returned is the value that will be next returned from * idr_alloc_cyclic() if it is free (otherwise the search will start from * this position). */ static inline unsigned int idr_get_cursor(const struct idr *idr) { return READ_ONCE(idr->idr_next); } /** * idr_set_cursor - Set the current position of the cyclic allocator * @idr: idr handle * @val: new position * * The next call to idr_alloc_cyclic() will return @val if it is free * (otherwise the search will start from this position). */ static inline void idr_set_cursor(struct idr *idr, unsigned int val) { WRITE_ONCE(idr->idr_next, val); } /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) * * idr_find() is able to be called locklessly, using RCU. The caller must * ensure calls to this function are made within rcu_read_lock() regions. * Other readers (lock-free or otherwise) and modifications may be running * concurrently. * * It is still required that the caller manage the synchronization and * lifetimes of the items. So if RCU lock-free lookups are used, typically * this would mean that the items have their own locks, or are amenable to * lock-free access; and that the items are freed by RCU (or only freed after * having been deleted from the idr tree *and* a synchronize_rcu() grace * period). */ #define idr_lock(idr) xa_lock(&(idr)->idr_rt) #define idr_unlock(idr) xa_unlock(&(idr)->idr_rt) #define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt) #define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt) #define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt) #define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt) #define idr_lock_irqsave(idr, flags) \ xa_lock_irqsave(&(idr)->idr_rt, flags) #define idr_unlock_irqrestore(idr, flags) \ xa_unlock_irqrestore(&(idr)->idr_rt, flags) void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *id, unsigned long max, gfp_t); int idr_alloc_cyclic(struct idr *, void *ptr, int start, int end, gfp_t); void *idr_remove(struct idr *, unsigned long id); void *idr_find(const struct idr *, unsigned long id); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); /** * idr_init_base() - Initialise an IDR. * @idr: IDR handle. * @base: The base value for the IDR. * * This variation of idr_init() creates an IDR which will allocate IDs * starting at %base. */ static inline void idr_init_base(struct idr *idr, int base) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); idr->idr_base = base; idr->idr_next = 0; } /** * idr_init() - Initialise an IDR. * @idr: IDR handle. * * Initialise a dynamically allocated IDR. To initialise a * statically allocated IDR, use DEFINE_IDR(). */ static inline void idr_init(struct idr *idr) { idr_init_base(idr, 0); } /** * idr_is_empty() - Are there any IDs allocated? * @idr: IDR handle. * * Return: %true if any IDs have been allocated from this IDR. */ static inline bool idr_is_empty(const struct idr *idr) { return radix_tree_empty(&idr->idr_rt) && radix_tree_tagged(&idr->idr_rt, IDR_FREE); } /** * idr_preload_end - end preload section started with idr_preload() * * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ static inline void idr_preload_end(void) { local_unlock(&radix_tree_preloads.lock); } /** * idr_for_each_entry() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; id += 1U) /** * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor. * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** * idr_for_each_entry_continue() - Continue iteration over an IDR's elements of a given type * @idr: IDR handle. * @entry: The type * to use as a cursor. * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. */ #define idr_for_each_entry_continue(idr, entry, id) \ for ((entry) = idr_get_next((idr), &(id)); \ entry; \ ++id, (entry) = idr_get_next((idr), &(id))) /** * idr_for_each_entry_continue_ul() - Continue iteration over an IDR's elements of a given type * @idr: IDR handle. * @entry: The type * to use as a cursor. * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. * After normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* * IDA - ID Allocator, use when translation from id to pointer isn't necessary. */ #define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ #define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long)) #define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) struct ida_bitmap { unsigned long bitmap[IDA_BITMAP_LONGS]; }; struct ida { struct xarray xa; }; #define IDA_INIT_FLAGS (XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC) #define IDA_INIT(name) { \ .xa = XARRAY_INIT(name, IDA_INIT_FLAGS) \ } #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); /** * ida_alloc() - Allocate an unused ID. * @ida: IDA handle. * @gfp: Memory allocation flags. * * Allocate an ID between 0 and %INT_MAX, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc(struct ida *ida, gfp_t gfp) { return ida_alloc_range(ida, 0, ~0, gfp); } /** * ida_alloc_min() - Allocate an unused ID. * @ida: IDA handle. * @min: Lowest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between @min and %INT_MAX, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp) { return ida_alloc_range(ida, min, ~0, gfp); } /** * ida_alloc_max() - Allocate an unused ID. * @ida: IDA handle. * @max: Highest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between 0 and @max, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp) { return ida_alloc_range(ida, 0, max, gfp); } static inline void ida_init(struct ida *ida) { xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } /* * ida_simple_get() and ida_simple_remove() are deprecated. Use * ida_alloc() and ida_free() instead respectively. */ #define ida_simple_get(ida, start, end, gfp) \ ida_alloc_range(ida, start, (end) - 1, gfp) #define ida_simple_remove(ida, id) ida_free(ida, id) static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } #endif /* __IDR_H__ */
5 4 1 4 2 3 2 4 4 4 2 2 2 3 2 3 5 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 /* * Poly1305 authenticator algorithm, RFC7539 * * Copyright (C) 2015 Martin Willi * * Based on public domain code by Andrew Moon and Daniel J. Bernstein. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ #include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/unaligned.h> static int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); poly1305_core_init(&dctx->h); dctx->buflen = 0; dctx->rset = 0; dctx->sset = false; return 0; } static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { if (!dctx->sset) { if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { poly1305_core_setkey(&dctx->core_r, src); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->rset = 2; } if (srclen >= POLY1305_BLOCK_SIZE) { dctx->s[0] = get_unaligned_le32(src + 0); dctx->s[1] = get_unaligned_le32(src + 4); dctx->s[2] = get_unaligned_le32(src + 8); dctx->s[3] = get_unaligned_le32(src + 12); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->sset = true; } } return srclen; } static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { unsigned int datalen; if (unlikely(!dctx->sset)) { datalen = crypto_poly1305_setdesckey(dctx, src, srclen); src += srclen - datalen; srclen = datalen; } poly1305_core_blocks(&dctx->h, &dctx->core_r, src, srclen / POLY1305_BLOCK_SIZE, 1); } static int crypto_poly1305_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int bytes; if (unlikely(dctx->buflen)) { bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); memcpy(dctx->buf + dctx->buflen, src, bytes); src += bytes; srclen -= bytes; dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE); dctx->buflen = 0; } } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { poly1305_blocks(dctx, src, srclen); src += srclen - (srclen % POLY1305_BLOCK_SIZE); srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { dctx->buflen = srclen; memcpy(dctx->buf, src, srclen); } return 0; } static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); if (unlikely(!dctx->sset)) return -ENOKEY; poly1305_final_generic(dctx, dst); return 0; } static struct shash_alg poly1305_alg = { .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_poly1305_init, .update = crypto_poly1305_update, .final = crypto_poly1305_final, .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", .cra_driver_name = "poly1305-generic", .cra_priority = 100, .cra_blocksize = POLY1305_BLOCK_SIZE, .cra_module = THIS_MODULE, }, }; static int __init poly1305_mod_init(void) { return crypto_register_shash(&poly1305_alg); } static void __exit poly1305_mod_exit(void) { crypto_unregister_shash(&poly1305_alg); } subsys_initcall(poly1305_mod_init); module_exit(poly1305_mod_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); MODULE_DESCRIPTION("Poly1305 authenticator"); MODULE_ALIAS_CRYPTO("poly1305"); MODULE_ALIAS_CRYPTO("poly1305-generic");
576 9 587 582 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 // SPDX-License-Identifier: GPL-2.0-only /* * Landlock LSM - Credential hooks * * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI */ #include <linux/cred.h> #include <linux/lsm_hooks.h> #include "common.h" #include "cred.h" #include "ruleset.h" #include "setup.h" static void hook_cred_transfer(struct cred *const new, const struct cred *const old) { struct landlock_ruleset *const old_dom = landlock_cred(old)->domain; if (old_dom) { landlock_get_ruleset(old_dom); landlock_cred(new)->domain = old_dom; } } static int hook_cred_prepare(struct cred *const new, const struct cred *const old, const gfp_t gfp) { hook_cred_transfer(new, old); return 0; } static void hook_cred_free(struct cred *const cred) { struct landlock_ruleset *const dom = landlock_cred(cred)->domain; if (dom) landlock_put_ruleset_deferred(dom); } static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, hook_cred_prepare), LSM_HOOK_INIT(cred_transfer, hook_cred_transfer), LSM_HOOK_INIT(cred_free, hook_cred_free), }; __init void landlock_add_cred_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); }
759 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SYNC_CORE_H #define _ASM_X86_SYNC_CORE_H #include <linux/preempt.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> #ifdef CONFIG_X86_32 static __always_inline void iret_to_self(void) { asm volatile ( "pushfl\n\t" "pushl %%cs\n\t" "pushl $1f\n\t" "iret\n\t" "1:" : ASM_CALL_CONSTRAINT : : "memory"); } #else static __always_inline void iret_to_self(void) { unsigned int tmp; asm volatile ( "mov %%ss, %0\n\t" "pushq %q0\n\t" "pushq %%rsp\n\t" "addq $8, (%%rsp)\n\t" "pushfq\n\t" "mov %%cs, %0\n\t" "pushq %q0\n\t" "pushq $1f\n\t" "iretq\n\t" "1:" : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); } #endif /* CONFIG_X86_32 */ /* * This function forces the icache and prefetched instruction stream to * catch up with reality in two very specific cases: * * a) Text was modified using one virtual address and is about to be executed * from the same physical page at a different virtual address. * * b) Text was modified on a different CPU, may subsequently be * executed on this CPU, and you want to make sure the new version * gets executed. This generally means you're calling this in an IPI. * * If you're calling this for a different reason, you're probably doing * it wrong. * * Like all of Linux's memory ordering operations, this is a * compiler barrier as well. */ static __always_inline void sync_core(void) { /* * The SERIALIZE instruction is the most straightforward way to * do this, but it is not universally available. */ if (static_cpu_has(X86_FEATURE_SERIALIZE)) { serialize(); return; } /* * For all other processors, there are quite a few ways to do this. * IRET-to-self is nice because it works on every CPU, at any CPL * (so it's compatible with paravirtualization), and it never exits * to a hypervisor. The only downsides are that it's a bit slow * (it seems to be a bit more than 2x slower than the fastest * options) and that it unmasks NMIs. The "push %cs" is needed, * because in paravirtual environments __KERNEL_CS may not be a * valid CS value when we do IRET directly. * * In case NMI unmasking or performance ever becomes a problem, * the next best option appears to be MOV-to-CR2 and an * unconditional jump. That sequence also works on all CPUs, * but it will fault at CPL3 (i.e. Xen PV). * * CPUID is the conventional way, but it's nasty: it doesn't * exist on some 486-like CPUs, and it usually exits to a * hypervisor. */ iret_to_self(); } /* * Ensure that a core serializing instruction is issued before returning * to user-mode. x86 implements return to user-space through sysexit, * sysrel, and sysretq, which are not core serializing. */ static inline void sync_core_before_usermode(void) { /* With PTI, we unconditionally serialize before running user code. */ if (static_cpu_has(X86_FEATURE_PTI)) return; /* * Even if we're in an interrupt, we might reschedule before returning, * in which case we could switch to a different thread in the same mm * and return using SYSRET or SYSEXIT. Instead of trying to keep * track of our need to sync the core, just sync right away. */ sync_core(); } #endif /* _ASM_X86_SYNC_CORE_H */
3 3 1 3 1 10 10 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2020 Linaro Limited * * Author: Daniel Lezcano <daniel.lezcano@linaro.org> * * Generic netlink for thermal management framework */ #include <linux/module.h> #include <linux/notifier.h> #include <linux/kernel.h> #include <net/sock.h> #include <net/genetlink.h> #include <uapi/linux/thermal.h> #include "thermal_core.h" static const struct genl_multicast_group thermal_genl_mcgrps[] = { [THERMAL_GENL_SAMPLING_GROUP] = { .name = THERMAL_GENL_SAMPLING_GROUP_NAME, }, [THERMAL_GENL_EVENT_GROUP] = { .name = THERMAL_GENL_EVENT_GROUP_NAME, }, }; static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { /* Thermal zone */ [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED }, [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED }, [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING, .len = THERMAL_NAME_LENGTH }, /* Governor(s) */ [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED }, [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING, .len = THERMAL_NAME_LENGTH }, /* Cooling devices */ [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED }, [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING, .len = THERMAL_NAME_LENGTH }, /* CPU capabilities */ [THERMAL_GENL_ATTR_CPU_CAPABILITY] = { .type = NLA_NESTED }, [THERMAL_GENL_ATTR_CPU_CAPABILITY_ID] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY] = { .type = NLA_U32 }, /* Thresholds */ [THERMAL_GENL_ATTR_THRESHOLD] = { .type = NLA_NESTED }, [THERMAL_GENL_ATTR_THRESHOLD_TEMP] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_THRESHOLD_DIRECTION] = { .type = NLA_U32 }, }; struct param { struct nlattr **attrs; struct sk_buff *msg; const char *name; int tz_id; int cdev_id; int trip_id; int trip_temp; int trip_type; int trip_hyst; int temp; int prev_temp; int direction; int cdev_state; int cdev_max_state; struct thermal_genl_cpu_caps *cpu_capabilities; int cpu_capabilities_count; }; typedef int (*cb_t)(struct param *); static struct genl_family thermal_genl_family; static BLOCKING_NOTIFIER_HEAD(thermal_genl_chain); static int thermal_group_has_listeners(enum thermal_genl_multicast_groups group) { return genl_has_listeners(&thermal_genl_family, &init_net, group); } /************************** Sampling encoding *******************************/ int thermal_genl_sampling_temp(int id, int temp) { struct sk_buff *skb; void *hdr; if (!thermal_group_has_listeners(THERMAL_GENL_SAMPLING_GROUP)) return 0; skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_put(skb, 0, 0, &thermal_genl_family, 0, THERMAL_GENL_SAMPLING_TEMP); if (!hdr) goto out_free; if (nla_put_u32(skb, THERMAL_GENL_ATTR_TZ_ID, id)) goto out_cancel; if (nla_put_u32(skb, THERMAL_GENL_ATTR_TZ_TEMP, temp)) goto out_cancel; genlmsg_end(skb, hdr); genlmsg_multicast(&thermal_genl_family, skb, 0, THERMAL_GENL_SAMPLING_GROUP, GFP_KERNEL); return 0; out_cancel: genlmsg_cancel(skb, hdr); out_free: nlmsg_free(skb); return -EMSGSIZE; } /**************************** Event encoding *********************************/ static int thermal_genl_event_tz_create(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_string(p->msg, THERMAL_GENL_ATTR_TZ_NAME, p->name)) return -EMSGSIZE; return 0; } static int thermal_genl_event_tz(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) return -EMSGSIZE; return 0; } static int thermal_genl_event_tz_trip_up(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, p->trip_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TEMP, p->temp)) return -EMSGSIZE; return 0; } static int thermal_genl_event_tz_trip_change(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, p->trip_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, p->trip_type) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_TEMP, p->trip_temp) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_HYST, p->trip_hyst)) return -EMSGSIZE; return 0; } static int thermal_genl_event_cdev_add(struct param *p) { if (nla_put_string(p->msg, THERMAL_GENL_ATTR_CDEV_NAME, p->name) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_CDEV_ID, p->cdev_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_CDEV_MAX_STATE, p->cdev_max_state)) return -EMSGSIZE; return 0; } static int thermal_genl_event_cdev_delete(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_CDEV_ID, p->cdev_id)) return -EMSGSIZE; return 0; } static int thermal_genl_event_cdev_state_update(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_CDEV_ID, p->cdev_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_CDEV_CUR_STATE, p->cdev_state)) return -EMSGSIZE; return 0; } static int thermal_genl_event_gov_change(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_string(p->msg, THERMAL_GENL_ATTR_GOV_NAME, p->name)) return -EMSGSIZE; return 0; } static int thermal_genl_event_cpu_capability_change(struct param *p) { struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities; struct sk_buff *msg = p->msg; struct nlattr *start_cap; int i; start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY); if (!start_cap) return -EMSGSIZE; for (i = 0; i < p->cpu_capabilities_count; ++i) { if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, cpu_cap->cpu)) goto out_cancel_nest; if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, cpu_cap->performance)) goto out_cancel_nest; if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, cpu_cap->efficiency)) goto out_cancel_nest; ++cpu_cap; } nla_nest_end(msg, start_cap); return 0; out_cancel_nest: nla_nest_cancel(msg, start_cap); return -EMSGSIZE; } static int thermal_genl_event_threshold_add(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, p->temp) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, p->direction)) return -EMSGSIZE; return 0; } static int thermal_genl_event_threshold_flush(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) return -EMSGSIZE; return 0; } static int thermal_genl_event_threshold_up(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_PREV_TEMP, p->prev_temp) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TEMP, p->temp)) return -EMSGSIZE; return 0; } int thermal_genl_event_tz_delete(struct param *p) __attribute__((alias("thermal_genl_event_tz"))); int thermal_genl_event_tz_enable(struct param *p) __attribute__((alias("thermal_genl_event_tz"))); int thermal_genl_event_tz_disable(struct param *p) __attribute__((alias("thermal_genl_event_tz"))); int thermal_genl_event_tz_trip_down(struct param *p) __attribute__((alias("thermal_genl_event_tz_trip_up"))); int thermal_genl_event_threshold_delete(struct param *p) __attribute__((alias("thermal_genl_event_threshold_add"))); int thermal_genl_event_threshold_down(struct param *p) __attribute__((alias("thermal_genl_event_threshold_up"))); static cb_t event_cb[] = { [THERMAL_GENL_EVENT_TZ_CREATE] = thermal_genl_event_tz_create, [THERMAL_GENL_EVENT_TZ_DELETE] = thermal_genl_event_tz_delete, [THERMAL_GENL_EVENT_TZ_ENABLE] = thermal_genl_event_tz_enable, [THERMAL_GENL_EVENT_TZ_DISABLE] = thermal_genl_event_tz_disable, [THERMAL_GENL_EVENT_TZ_TRIP_UP] = thermal_genl_event_tz_trip_up, [THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = thermal_genl_event_tz_trip_down, [THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = thermal_genl_event_tz_trip_change, [THERMAL_GENL_EVENT_CDEV_ADD] = thermal_genl_event_cdev_add, [THERMAL_GENL_EVENT_CDEV_DELETE] = thermal_genl_event_cdev_delete, [THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update, [THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = thermal_genl_event_gov_change, [THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change, [THERMAL_GENL_EVENT_THRESHOLD_ADD] = thermal_genl_event_threshold_add, [THERMAL_GENL_EVENT_THRESHOLD_DELETE] = thermal_genl_event_threshold_delete, [THERMAL_GENL_EVENT_THRESHOLD_FLUSH] = thermal_genl_event_threshold_flush, [THERMAL_GENL_EVENT_THRESHOLD_DOWN] = thermal_genl_event_threshold_down, [THERMAL_GENL_EVENT_THRESHOLD_UP] = thermal_genl_event_threshold_up, }; /* * Generic netlink event encoding */ static int thermal_genl_send_event(enum thermal_genl_event event, struct param *p) { struct sk_buff *msg; int ret = -EMSGSIZE; void *hdr; if (!thermal_group_has_listeners(THERMAL_GENL_EVENT_GROUP)) return 0; msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg) return -ENOMEM; p->msg = msg; hdr = genlmsg_put(msg, 0, 0, &thermal_genl_family, 0, event); if (!hdr) goto out_free_msg; ret = event_cb[event](p); if (ret) goto out_cancel_msg; genlmsg_end(msg, hdr); genlmsg_multicast(&thermal_genl_family, msg, 0, THERMAL_GENL_EVENT_GROUP, GFP_KERNEL); return 0; out_cancel_msg: genlmsg_cancel(msg, hdr); out_free_msg: nlmsg_free(msg); return ret; } int thermal_notify_tz_create(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id, .name = tz->type }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_CREATE, &p); } int thermal_notify_tz_delete(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_DELETE, &p); } int thermal_notify_tz_enable(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_ENABLE, &p); } int thermal_notify_tz_disable(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_DISABLE, &p); } int thermal_notify_tz_trip_down(const struct thermal_zone_device *tz, const struct thermal_trip *trip) { struct param p = { .tz_id = tz->id, .trip_id = thermal_zone_trip_id(tz, trip), .temp = tz->temperature }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_DOWN, &p); } int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, const struct thermal_trip *trip) { struct param p = { .tz_id = tz->id, .trip_id = thermal_zone_trip_id(tz, trip), .temp = tz->temperature }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_UP, &p); } int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, const struct thermal_trip *trip) { struct param p = { .tz_id = tz->id, .trip_id = thermal_zone_trip_id(tz, trip), .trip_type = trip->type, .trip_temp = trip->temperature, .trip_hyst = trip->hysteresis }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_CHANGE, &p); } int thermal_notify_cdev_state_update(const struct thermal_cooling_device *cdev, int state) { struct param p = { .cdev_id = cdev->id, .cdev_state = state }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, &p); } int thermal_notify_cdev_add(const struct thermal_cooling_device *cdev) { struct param p = { .cdev_id = cdev->id, .name = cdev->type, .cdev_max_state = cdev->max_state }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_ADD, &p); } int thermal_notify_cdev_delete(const struct thermal_cooling_device *cdev) { struct param p = { .cdev_id = cdev->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_DELETE, &p); } int thermal_notify_tz_gov_change(const struct thermal_zone_device *tz, const char *name) { struct param p = { .tz_id = tz->id, .name = name }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p); } int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps) { struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p); } EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event); int thermal_notify_threshold_add(const struct thermal_zone_device *tz, int temperature, int direction) { struct param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_ADD, &p); } int thermal_notify_threshold_delete(const struct thermal_zone_device *tz, int temperature, int direction) { struct param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_DELETE, &p); } int thermal_notify_threshold_flush(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_FLUSH, &p); } int thermal_notify_threshold_down(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id, .temp = tz->temperature, .prev_temp = tz->last_temperature }; return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_DOWN, &p); } int thermal_notify_threshold_up(const struct thermal_zone_device *tz) { struct param p = { .tz_id = tz->id, .temp = tz->temperature, .prev_temp = tz->last_temperature }; return thermal_genl_send_event(THERMAL_GENL_EVENT_THRESHOLD_UP, &p); } /*************************** Command encoding ********************************/ static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz, void *data) { struct sk_buff *msg = data; if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, tz->id) || nla_put_string(msg, THERMAL_GENL_ATTR_TZ_NAME, tz->type)) return -EMSGSIZE; return 0; } static int thermal_genl_cmd_tz_get_id(struct param *p) { struct sk_buff *msg = p->msg; struct nlattr *start_tz; int ret; start_tz = nla_nest_start(msg, THERMAL_GENL_ATTR_TZ); if (!start_tz) return -EMSGSIZE; ret = for_each_thermal_zone(__thermal_genl_cmd_tz_get_id, msg); if (ret) goto out_cancel_nest; nla_nest_end(msg, start_tz); return 0; out_cancel_nest: nla_nest_cancel(msg, start_tz); return ret; } static int thermal_genl_cmd_tz_get_trip(struct param *p) { struct sk_buff *msg = p->msg; const struct thermal_trip_desc *td; struct nlattr *start_trip; int id; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; start_trip = nla_nest_start(msg, THERMAL_GENL_ATTR_TZ_TRIP); if (!start_trip) return -EMSGSIZE; guard(thermal_zone)(tz); for_each_trip_desc(tz, td) { const struct thermal_trip *trip = &td->trip; if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, thermal_zone_trip_id(tz, trip)) || nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, trip->type) || nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TEMP, trip->temperature) || nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_HYST, trip->hysteresis)) return -EMSGSIZE; } nla_nest_end(msg, start_trip); return 0; } static int thermal_genl_cmd_tz_get_temp(struct param *p) { struct sk_buff *msg = p->msg; int temp, ret, id; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; ret = thermal_zone_get_temp(tz, &temp); if (ret) return ret; if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id) || nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TEMP, temp)) return -EMSGSIZE; return 0; } static int thermal_genl_cmd_tz_get_gov(struct param *p) { struct sk_buff *msg = p->msg; int id; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; guard(thermal_zone)(tz); if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id) || nla_put_string(msg, THERMAL_GENL_ATTR_TZ_GOV_NAME, tz->governor->name)) return -EMSGSIZE; return 0; } static int __thermal_genl_cmd_cdev_get(struct thermal_cooling_device *cdev, void *data) { struct sk_buff *msg = data; if (nla_put_u32(msg, THERMAL_GENL_ATTR_CDEV_ID, cdev->id)) return -EMSGSIZE; if (nla_put_string(msg, THERMAL_GENL_ATTR_CDEV_NAME, cdev->type)) return -EMSGSIZE; return 0; } static int thermal_genl_cmd_cdev_get(struct param *p) { struct sk_buff *msg = p->msg; struct nlattr *start_cdev; int ret; start_cdev = nla_nest_start(msg, THERMAL_GENL_ATTR_CDEV); if (!start_cdev) return -EMSGSIZE; ret = for_each_thermal_cooling_device(__thermal_genl_cmd_cdev_get, msg); if (ret) goto out_cancel_nest; nla_nest_end(msg, start_cdev); return 0; out_cancel_nest: nla_nest_cancel(msg, start_cdev); return ret; } static int __thermal_genl_cmd_threshold_get(struct user_threshold *threshold, void *arg) { struct sk_buff *msg = arg; if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, threshold->temperature) || nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, threshold->direction)) return -1; return 0; } static int thermal_genl_cmd_threshold_get(struct param *p) { struct sk_buff *msg = p->msg; struct nlattr *start_trip; int id, ret; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; start_trip = nla_nest_start(msg, THERMAL_GENL_ATTR_THRESHOLD); if (!start_trip) return -EMSGSIZE; ret = thermal_thresholds_for_each(tz, __thermal_genl_cmd_threshold_get, msg); if (ret) return -EMSGSIZE; nla_nest_end(msg, start_trip); return 0; } static int thermal_genl_cmd_threshold_add(struct param *p) { int id, temp, direction; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID] || !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP] || !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); temp = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]); direction = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; guard(thermal_zone)(tz); return thermal_thresholds_add(tz, temp, direction); } static int thermal_genl_cmd_threshold_delete(struct param *p) { int id, temp, direction; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID] || !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP] || !p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); temp = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]); direction = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; guard(thermal_zone)(tz); return thermal_thresholds_delete(tz, temp, direction); } static int thermal_genl_cmd_threshold_flush(struct param *p) { int id; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) return -EINVAL; id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; guard(thermal_zone)(tz); thermal_thresholds_flush(tz); return 0; } static cb_t cmd_cb[] = { [THERMAL_GENL_CMD_TZ_GET_ID] = thermal_genl_cmd_tz_get_id, [THERMAL_GENL_CMD_TZ_GET_TRIP] = thermal_genl_cmd_tz_get_trip, [THERMAL_GENL_CMD_TZ_GET_TEMP] = thermal_genl_cmd_tz_get_temp, [THERMAL_GENL_CMD_TZ_GET_GOV] = thermal_genl_cmd_tz_get_gov, [THERMAL_GENL_CMD_CDEV_GET] = thermal_genl_cmd_cdev_get, [THERMAL_GENL_CMD_THRESHOLD_GET] = thermal_genl_cmd_threshold_get, [THERMAL_GENL_CMD_THRESHOLD_ADD] = thermal_genl_cmd_threshold_add, [THERMAL_GENL_CMD_THRESHOLD_DELETE] = thermal_genl_cmd_threshold_delete, [THERMAL_GENL_CMD_THRESHOLD_FLUSH] = thermal_genl_cmd_threshold_flush, }; static int thermal_genl_cmd_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct param p = { .msg = skb }; const struct genl_dumpit_info *info = genl_dumpit_info(cb); int cmd = info->op.cmd; int ret; void *hdr; hdr = genlmsg_put(skb, 0, 0, &thermal_genl_family, 0, cmd); if (!hdr) return -EMSGSIZE; ret = cmd_cb[cmd](&p); if (ret) goto out_cancel_msg; genlmsg_end(skb, hdr); return 0; out_cancel_msg: genlmsg_cancel(skb, hdr); return ret; } static int thermal_genl_cmd_doit(struct sk_buff *skb, struct genl_info *info) { struct param p = { .attrs = info->attrs }; struct sk_buff *msg; void *hdr; int cmd = info->genlhdr->cmd; int ret = -EMSGSIZE; msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg) return -ENOMEM; p.msg = msg; hdr = genlmsg_put_reply(msg, info, &thermal_genl_family, 0, cmd); if (!hdr) goto out_free_msg; ret = cmd_cb[cmd](&p); if (ret) goto out_cancel_msg; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); out_cancel_msg: genlmsg_cancel(msg, hdr); out_free_msg: nlmsg_free(msg); return ret; } static int thermal_genl_bind(int mcgrp) { struct thermal_genl_notify n = { .mcgrp = mcgrp }; if (WARN_ON_ONCE(mcgrp > THERMAL_GENL_MAX_GROUP)) return -EINVAL; blocking_notifier_call_chain(&thermal_genl_chain, THERMAL_NOTIFY_BIND, &n); return 0; } static void thermal_genl_unbind(int mcgrp) { struct thermal_genl_notify n = { .mcgrp = mcgrp }; if (WARN_ON_ONCE(mcgrp > THERMAL_GENL_MAX_GROUP)) return; blocking_notifier_call_chain(&thermal_genl_chain, THERMAL_NOTIFY_UNBIND, &n); } static const struct genl_small_ops thermal_genl_ops[] = { { .cmd = THERMAL_GENL_CMD_TZ_GET_ID, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = thermal_genl_cmd_dumpit, }, { .cmd = THERMAL_GENL_CMD_TZ_GET_TRIP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, { .cmd = THERMAL_GENL_CMD_TZ_GET_TEMP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, { .cmd = THERMAL_GENL_CMD_TZ_GET_GOV, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, { .cmd = THERMAL_GENL_CMD_CDEV_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = thermal_genl_cmd_dumpit, }, { .cmd = THERMAL_GENL_CMD_THRESHOLD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, { .cmd = THERMAL_GENL_CMD_THRESHOLD_ADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, { .cmd = THERMAL_GENL_CMD_THRESHOLD_DELETE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, { .cmd = THERMAL_GENL_CMD_THRESHOLD_FLUSH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = thermal_genl_cmd_doit, }, }; static struct genl_family thermal_genl_family __ro_after_init = { .hdrsize = 0, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, .maxattr = THERMAL_GENL_ATTR_MAX, .policy = thermal_genl_policy, .bind = thermal_genl_bind, .unbind = thermal_genl_unbind, .small_ops = thermal_genl_ops, .n_small_ops = ARRAY_SIZE(thermal_genl_ops), .resv_start_op = __THERMAL_GENL_CMD_MAX, .mcgrps = thermal_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(thermal_genl_mcgrps), }; int thermal_genl_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&thermal_genl_chain, nb); } int thermal_genl_unregister_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&thermal_genl_chain, nb); } int __init thermal_netlink_init(void) { return genl_register_family(&thermal_genl_family); } void __init thermal_netlink_exit(void) { genl_unregister_family(&thermal_genl_family); }
2 16 1093 424 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETLINK_H #define __LINUX_NETLINK_H #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/export.h> #include <net/scm.h> #include <uapi/linux/netlink.h> struct net; void do_trace_netlink_extack(const char *msg); static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; } enum netlink_skb_flags { NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ __u32 portid; __u32 dst_group; __u32 flags; struct sock *sk; bool nsid_is_set; int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) #define NETLINK_CTX_SIZE 48 void netlink_table_grab(void); void netlink_table_ungrab(void); #define NL_CFG_F_NONROOT_RECV (1 << 0) #define NL_CFG_F_NONROOT_SEND (1 << 1) /* optional Netlink kernel configuration parameters */ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); void (*release) (struct sock *sk, unsigned long *groups); }; struct sock *__netlink_kernel_create(struct net *net, int unit, struct module *module, struct netlink_kernel_cfg *cfg); static inline struct sock * netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) { return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } /* this can be increased when necessary - don't expose to userland */ #define NETLINK_MAX_COOKIE_LEN 20 #define NETLINK_MAX_FMTMSG_LEN 80 /** * struct netlink_ext_ack - netlink extended ACK report struct * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error * @policy: policy for a bad attribute * @miss_type: attribute type which was missing * @miss_nest: nest missing an attribute (%NULL if missing top level attr) * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length * @_msg_buf: output buffer for formatted message strings - don't access * directly, use %NL_SET_ERR_MSG_FMT */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; const struct nla_policy *policy; const struct nlattr *miss_nest; u16 miss_type; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; char _msg_buf[NETLINK_MAX_FMTMSG_LEN]; }; /* Always use this macro, this allows later putting the * message into a separate section or such for things * like translation or listing all possible messages. * If string formatting is needed use NL_SET_ERR_MSG_FMT. */ #define NL_SET_ERR_MSG(extack, msg) do { \ static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ do_trace_netlink_extack(__msg); \ \ if (__extack) \ __extack->_msg = __msg; \ } while (0) /* We splice fmt with %s at each end even in the snprintf so that both calls * can use the same string constant, avoiding its duplication in .ro */ #define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do { \ struct netlink_ext_ack *__extack = (extack); \ \ if (!__extack) \ break; \ if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ "%s" fmt "%s", "", ##args, "") >= \ NETLINK_MAX_FMTMSG_LEN) \ net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ ##args, "\n"); \ \ do_trace_netlink_extack(__extack->_msg_buf); \ \ __extack->_msg = __extack->_msg_buf; \ } while (0) #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) #define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \ NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args) #define NL_SET_ERR_MSG_WEAK(extack, msg) do { \ if ((extack) && !(extack)->_msg) \ NL_SET_ERR_MSG((extack), msg); \ } while (0) #define NL_SET_ERR_MSG_WEAK_MOD(extack, msg) do { \ if ((extack) && !(extack)->_msg) \ NL_SET_ERR_MSG_MOD((extack), msg); \ } while (0) #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ if ((extack)) { \ (extack)->bad_attr = (attr); \ (extack)->policy = (pol); \ } \ } while (0) #define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL) #define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do { \ static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ do_trace_netlink_extack(__msg); \ \ if (__extack) { \ __extack->_msg = __msg; \ __extack->bad_attr = (attr); \ __extack->policy = (pol); \ } \ } while (0) #define NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, pol, fmt, args...) do { \ struct netlink_ext_ack *__extack = (extack); \ \ if (!__extack) \ break; \ \ if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ "%s" fmt "%s", "", ##args, "") >= \ NETLINK_MAX_FMTMSG_LEN) \ net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ ##args, "\n"); \ \ do_trace_netlink_extack(__extack->_msg_buf); \ \ __extack->_msg = __extack->_msg_buf; \ __extack->bad_attr = (attr); \ __extack->policy = (pol); \ } while (0) #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) #define NL_SET_ERR_MSG_ATTR_FMT(extack, attr, msg, args...) \ NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, NULL, msg, ##args) #define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) { \ __extack->miss_nest = (nest); \ __extack->miss_type = (type); \ } \ } while (0) #define NL_REQ_ATTR_CHECK(extack, nest, tb, type) ({ \ struct nlattr **__tb = (tb); \ u32 __attr = (type); \ int __retval; \ \ __retval = !__tb[__attr]; \ if (__retval) \ NL_SET_ERR_ATTR_MISS((extack), (nest), __attr); \ __retval; \ }) static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { if (!extack) return; memcpy(extack->cookie, &cookie, sizeof(cookie)); extack->cookie_len = sizeof(cookie); } void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack); int netlink_has_listeners(struct sock *sk, unsigned int group); bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data); int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation, netlink_filter_fn filter, void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfd(int fd); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb); static inline struct sk_buff * netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *nskb; nskb = skb_clone(skb, gfp_mask); if (!nskb) return NULL; /* This is a large skb, set destructor callback to release head */ if (is_vmalloc_addr(skb->head)) nskb->destructor = skb->destructor; return nskb; } /* * skb should fit one page. This choice is good for headerless malloc. * But we should limit to 8K so that userspace does not have to * use enormous buffer sizes on recvmsg() calls just to avoid * MSG_TRUNC when PAGE_SIZE is very large. */ #if PAGE_SIZE < 8192UL #define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE) #else #define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL) #endif #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); void *data; /* the module that dump function belong to */ struct module *module; struct netlink_ext_ack *extack; u16 family; u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; int flags; bool strict_check; union { u8 ctx[NETLINK_CTX_SIZE]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. */ long args[6]; }; }; #define NL_ASSERT_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) struct netlink_notify { struct net *net; u32 portid; int protocol; }; struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); struct netlink_ext_ack *extack; void *data; struct module *module; u32 min_dump_alloc; int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control); static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control) { if (!control->module) control->module = THIS_MODULE; return __netlink_dump_start(ssk, skb, nlh, control); } struct netlink_tap { struct net_device *dev; struct module *module; struct list_head list; }; int netlink_add_tap(struct netlink_tap *nt); int netlink_remove_tap(struct netlink_tap *nt); bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, struct user_namespace *ns, int cap); bool netlink_ns_capable(const struct sk_buff *skb, struct user_namespace *ns, int cap); bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap); struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast); #endif /* __LINUX_NETLINK_H */
8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC security handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/crypto.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> #include "ar-internal.h" static const struct rxrpc_security *rxrpc_security_types[] = { [RXRPC_SECURITY_NONE] = &rxrpc_no_security, #ifdef CONFIG_RXKAD [RXRPC_SECURITY_RXKAD] = &rxkad, #endif }; int __init rxrpc_init_security(void) { int i, ret; for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) { if (rxrpc_security_types[i]) { ret = rxrpc_security_types[i]->init(); if (ret < 0) goto failed; } } return 0; failed: for (i--; i >= 0; i--) if (rxrpc_security_types[i]) rxrpc_security_types[i]->exit(); return ret; } void rxrpc_exit_security(void) { int i; for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) if (rxrpc_security_types[i]) rxrpc_security_types[i]->exit(); } /* * look up an rxrpc security module */ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { if (security_index >= ARRAY_SIZE(rxrpc_security_types)) return NULL; return rxrpc_security_types[security_index]; } /* * Initialise the security on a client call. */ int rxrpc_init_client_call_security(struct rxrpc_call *call) { const struct rxrpc_security *sec = &rxrpc_no_security; struct rxrpc_key_token *token; struct key *key = call->key; int ret; if (!key) goto found; ret = key_validate(key); if (ret < 0) return ret; for (token = key->payload.data[0]; token; token = token->next) { sec = rxrpc_security_lookup(token->security_index); if (sec) goto found; } return -EKEYREJECTED; found: call->security = sec; call->security_ix = sec->security_index; return 0; } /* * initialise the security on a client connection */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { struct rxrpc_key_token *token; struct key *key = conn->key; int ret = 0; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); for (token = key->payload.data[0]; token; token = token->next) { if (token->security_index == conn->security->security_index) goto found; } return -EKEYREJECTED; found: mutex_lock(&conn->security_lock); if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) { ret = conn->security->init_connection_security(conn, token); if (ret == 0) { spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) conn->state = RXRPC_CONN_CLIENT; spin_unlock(&conn->state_lock); } } mutex_unlock(&conn->security_lock); return ret; } /* * Set the ops a server connection. */ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, struct sk_buff *skb) { const struct rxrpc_security *sec; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter(""); sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security, RX_INVALID_OPERATION, -EKEYREJECTED); return NULL; } if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && !rx->securities) { rxrpc_direct_abort(skb, rxrpc_abort_no_service_key, sec->no_key_abort, -EKEYREJECTED); return NULL; } return sec; } /* * Find the security key for a server connection. */ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, struct sk_buff *skb, u32 kvno, u32 enctype) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_sock *rx; struct key *key = ERR_PTR(-EKEYREJECTED); key_ref_t kref = NULL; char kdesc[5 + 1 + 3 + 1 + 12 + 1 + 12 + 1]; int ret; _enter(""); if (enctype) sprintf(kdesc, "%u:%u:%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex, kvno, enctype); else if (kvno) sprintf(kdesc, "%u:%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex, kvno); else sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); read_lock(&conn->local->services_lock); rx = conn->local->service; if (!rx) goto out; /* look through the service's keyring */ kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc, true); if (IS_ERR(kref)) { key = ERR_CAST(kref); goto out; } key = key_ref_to_ptr(kref); ret = key_validate(key); if (ret < 0) { key_put(key); key = ERR_PTR(ret); goto out; } out: read_unlock(&conn->local->services_lock); return key; }
44 29 28 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 /* * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/random.h> #include <linux/export.h> #include "rds.h" /* * All of connection management is simplified by serializing it through * work queues that execute in a connection managing thread. * * TCP wants to send acks through sendpage() in response to data_ready(), * but it needs a process context to do so. * * The receive paths need to allocate but can't drop packets (!) so we have * a thread around to block allocating if the receive fast path sees an * allocation failure. */ /* Grand Unified Theory of connection life cycle: * At any point in time, the connection can be in one of these states: * DOWN, CONNECTING, UP, DISCONNECTING, ERROR * * The following transitions are possible: * ANY -> ERROR * UP -> DISCONNECTING * ERROR -> DISCONNECTING * DISCONNECTING -> DOWN * DOWN -> CONNECTING * CONNECTING -> UP * * Transition to state DISCONNECTING/DOWN: * - Inside the shutdown worker; synchronizes with xmit path * through RDS_IN_XMIT, and with connection management callbacks * via c_cm_lock. * * For receive callbacks, we rely on the underlying transport * (TCP, IB/RDMA) to provide the necessary synchronisation. */ struct workqueue_struct *rds_wq; EXPORT_SYMBOL_GPL(rds_wq); void rds_connect_path_complete(struct rds_conn_path *cp, int curr) { if (!rds_conn_path_transition(cp, curr, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP, " "current state is %d\n", __func__, atomic_read(&cp->cp_state)); rds_conn_path_drop(cp, false); return; } rdsdebug("conn %p for %pI6c to %pI6c complete\n", cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr); cp->cp_reconnect_jiffies = 0; set_bit(0, &cp->cp_conn->c_map_queued); rcu_read_lock(); if (!rds_destroy_pending(cp->cp_conn)) { queue_delayed_work(rds_wq, &cp->cp_send_w, 0); queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); } rcu_read_unlock(); cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; } EXPORT_SYMBOL_GPL(rds_connect_path_complete); void rds_connect_complete(struct rds_connection *conn) { rds_connect_path_complete(&conn->c_path[0], RDS_CONN_CONNECTING); } EXPORT_SYMBOL_GPL(rds_connect_complete); /* * This random exponential backoff is relied on to eventually resolve racing * connects. * * If connect attempts race then both parties drop both connections and come * here to wait for a random amount of time before trying again. Eventually * the backoff range will be so much greater than the time it takes to * establish a connection that one of the pair will establish the connection * before the other's random delay fires. * * Connection attempts that arrive while a connection is already established * are also considered to be racing connects. This lets a connection from * a rebooted machine replace an existing stale connection before the transport * notices that the connection has failed. * * We should *always* start with a random backoff; otherwise a broken connection * will always take several iterations to be re-established. */ void rds_queue_reconnect(struct rds_conn_path *cp) { unsigned long rand; struct rds_connection *conn = cp->cp_conn; rdsdebug("conn %p for %pI6c to %pI6c reconnect jiffies %lu\n", conn, &conn->c_laddr, &conn->c_faddr, cp->cp_reconnect_jiffies); /* let peer with smaller addr initiate reconnect, to avoid duels */ if (conn->c_trans->t_type == RDS_TRANS_TCP && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0) return; set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; rcu_read_lock(); if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); rcu_read_unlock(); return; } get_random_bytes(&rand, sizeof(rand)); rdsdebug("%lu delay %lu ceil conn %p for %pI6c -> %pI6c\n", rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr); rcu_read_lock(); if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_conn_w, rand % cp->cp_reconnect_jiffies); rcu_read_unlock(); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); } void rds_connect_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, cp_conn_w.work); struct rds_connection *conn = cp->cp_conn; int ret; if (cp->cp_index > 0 && rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) >= 0) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); if (ret) { ret = conn->c_trans->conn_path_connect(cp); rdsdebug("conn %p for %pI6c to %pI6c dispatched, ret %d\n", conn, &conn->c_laddr, &conn->c_faddr, ret); if (ret) { if (rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) rds_queue_reconnect(cp); else rds_conn_path_error(cp, "connect failed\n"); } } } void rds_send_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, cp_send_w.work); int ret; if (rds_conn_path_state(cp) == RDS_CONN_UP) { clear_bit(RDS_LL_SEND_FULL, &cp->cp_flags); ret = rds_send_xmit(cp); cond_resched(); rdsdebug("conn %p ret %d\n", cp->cp_conn, ret); switch (ret) { case -EAGAIN: rds_stats_inc(s_send_immediate_retry); queue_delayed_work(rds_wq, &cp->cp_send_w, 0); break; case -ENOMEM: rds_stats_inc(s_send_delayed_retry); queue_delayed_work(rds_wq, &cp->cp_send_w, 2); break; default: break; } } } void rds_recv_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, cp_recv_w.work); int ret; if (rds_conn_path_state(cp) == RDS_CONN_UP) { ret = cp->cp_conn->c_trans->recv_path(cp); rdsdebug("conn %p ret %d\n", cp->cp_conn, ret); switch (ret) { case -EAGAIN: rds_stats_inc(s_recv_immediate_retry); queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); break; case -ENOMEM: rds_stats_inc(s_recv_delayed_retry); queue_delayed_work(rds_wq, &cp->cp_recv_w, 2); break; default: break; } } } void rds_shutdown_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, cp_down_w); rds_conn_shutdown(cp); } void rds_threads_exit(void) { destroy_workqueue(rds_wq); } int rds_threads_init(void) { rds_wq = create_singlethread_workqueue("krdsd"); if (!rds_wq) return -ENOMEM; return 0; } /* Compare two IPv6 addresses. Return 0 if the two addresses are equal. * Return 1 if the first is greater. Return -1 if the second is greater. */ int rds_addr_cmp(const struct in6_addr *addr1, const struct in6_addr *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const __be64 *a1, *a2; u64 x, y; a1 = (__be64 *)addr1; a2 = (__be64 *)addr2; if (*a1 != *a2) { if (be64_to_cpu(*a1) < be64_to_cpu(*a2)) return -1; else return 1; } else { x = be64_to_cpu(*++a1); y = be64_to_cpu(*++a2); if (x < y) return -1; else if (x > y) return 1; else return 0; } #else u32 a, b; int i; for (i = 0; i < 4; i++) { if (addr1->s6_addr32[i] != addr2->s6_addr32[i]) { a = ntohl(addr1->s6_addr32[i]); b = ntohl(addr2->s6_addr32[i]); if (a < b) return -1; else if (a > b) return 1; } } return 0; #endif } EXPORT_SYMBOL_GPL(rds_addr_cmp);
761 761 761 489 757 759 758 760 759 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 // SPDX-License-Identifier: GPL-2.0 /* * jump label x86 support * * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> * */ #include <linux/jump_label.h> #include <linux/memory.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/list.h> #include <linux/jhash.h> #include <linux/cpu.h> #include <asm/kprobes.h> #include <asm/alternative.h> #include <asm/text-patching.h> #include <asm/insn.h> int arch_jump_entry_size(struct jump_entry *entry) { struct insn insn = {}; insn_decode_kernel(&insn, (void *)jump_entry_code(entry)); BUG_ON(insn.length != 2 && insn.length != 5); return insn.length; } struct jump_label_patch { const void *code; int size; }; static struct jump_label_patch __jump_label_patch(struct jump_entry *entry, enum jump_label_type type) { const void *expect, *code, *nop; const void *addr, *dest; int size; addr = (void *)jump_entry_code(entry); dest = (void *)jump_entry_target(entry); size = arch_jump_entry_size(entry); switch (size) { case JMP8_INSN_SIZE: code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest); nop = x86_nops[size]; break; case JMP32_INSN_SIZE: code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); nop = x86_nops[size]; break; default: BUG(); } if (type == JUMP_LABEL_JMP) expect = nop; else expect = code; if (memcmp(addr, expect, size)) { /* * The location is not an op that we were expecting. * Something went wrong. Crash the box, as something could be * corrupting the kernel. */ pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n", addr, addr, addr, expect, size, type); BUG(); } if (type == JUMP_LABEL_NOP) code = nop; return (struct jump_label_patch){.code = code, .size = size}; } static __always_inline void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { const struct jump_label_patch jlp = __jump_label_patch(entry, type); /* * As long as only a single processor is running and the code is still * not marked as RO, text_poke_early() can be used; Checking that * system_state is SYSTEM_BOOTING guarantees it. It will be set to * SYSTEM_SCHEDULING before other cores are awaken and before the * code is write-protected. * * At the time the change is being done, just ignore whether we * are doing nop -> jump or jump -> nop transition, and assume * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size); return; } text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); } static void __ref jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { mutex_lock(&text_mutex); __jump_label_transform(entry, type, init); mutex_unlock(&text_mutex); } void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { jump_label_transform(entry, type, 0); } bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { struct jump_label_patch jlp; if (system_state == SYSTEM_BOOTING) { /* * Fallback to the non-batching mode. */ arch_jump_label_transform(entry, type); return true; } mutex_lock(&text_mutex); jlp = __jump_label_patch(entry, type); text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); mutex_unlock(&text_mutex); return true; } void arch_jump_label_transform_apply(void) { mutex_lock(&text_mutex); text_poke_finish(); mutex_unlock(&text_mutex); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_WRAPPER_H #define __NET_TC_WRAPPER_H #include <net/pkt_cls.h> #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) #include <linux/cpufeature.h> #include <linux/static_key.h> #include <linux/indirect_call_wrapper.h> #define TC_INDIRECT_SCOPE extern struct static_key_false tc_skip_wrapper; /* TC Actions */ #ifdef CONFIG_NET_CLS_ACT #define TC_INDIRECT_ACTION_DECLARE(fname) \ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ const struct tc_action *a, \ struct tcf_result *res)) TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act); TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act); TC_INDIRECT_ACTION_DECLARE(tcf_csum_act); TC_INDIRECT_ACTION_DECLARE(tcf_ct_act); TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act); TC_INDIRECT_ACTION_DECLARE(tcf_gact_act); TC_INDIRECT_ACTION_DECLARE(tcf_gate_act); TC_INDIRECT_ACTION_DECLARE(tcf_ife_act); TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act); TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act); TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act); TC_INDIRECT_ACTION_DECLARE(tcf_nat_act); TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act); TC_INDIRECT_ACTION_DECLARE(tcf_police_act); TC_INDIRECT_ACTION_DECLARE(tcf_sample_act); TC_INDIRECT_ACTION_DECLARE(tcf_simp_act); TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act); TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act); TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act); TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { if (static_branch_likely(&tc_skip_wrapper)) goto skip; #if IS_BUILTIN(CONFIG_NET_ACT_GACT) if (a->ops->act == tcf_gact_act) return tcf_gact_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_MIRRED) if (a->ops->act == tcf_mirred_act) return tcf_mirred_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_PEDIT) if (a->ops->act == tcf_pedit_act) return tcf_pedit_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) if (a->ops->act == tcf_skbedit_act) return tcf_skbedit_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) if (a->ops->act == tcf_skbmod_act) return tcf_skbmod_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_POLICE) if (a->ops->act == tcf_police_act) return tcf_police_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_BPF) if (a->ops->act == tcf_bpf_act) return tcf_bpf_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) if (a->ops->act == tcf_connmark_act) return tcf_connmark_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CSUM) if (a->ops->act == tcf_csum_act) return tcf_csum_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CT) if (a->ops->act == tcf_ct_act) return tcf_ct_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CTINFO) if (a->ops->act == tcf_ctinfo_act) return tcf_ctinfo_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_GATE) if (a->ops->act == tcf_gate_act) return tcf_gate_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_MPLS) if (a->ops->act == tcf_mpls_act) return tcf_mpls_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_NAT) if (a->ops->act == tcf_nat_act) return tcf_nat_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) if (a->ops->act == tunnel_key_act) return tunnel_key_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_VLAN) if (a->ops->act == tcf_vlan_act) return tcf_vlan_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_IFE) if (a->ops->act == tcf_ife_act) return tcf_ife_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SIMP) if (a->ops->act == tcf_simp_act) return tcf_simp_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE) if (a->ops->act == tcf_sample_act) return tcf_sample_act(skb, a, res); #endif skip: return a->ops->act(skb, a, res); } #endif /* CONFIG_NET_CLS_ACT */ /* TC Filters */ #ifdef CONFIG_NET_CLS #define TC_INDIRECT_FILTER_DECLARE(fname) \ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ const struct tcf_proto *tp, \ struct tcf_result *res)) TC_INDIRECT_FILTER_DECLARE(basic_classify); TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify); TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify); TC_INDIRECT_FILTER_DECLARE(fl_classify); TC_INDIRECT_FILTER_DECLARE(flow_classify); TC_INDIRECT_FILTER_DECLARE(fw_classify); TC_INDIRECT_FILTER_DECLARE(mall_classify); TC_INDIRECT_FILTER_DECLARE(route4_classify); TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { if (static_branch_likely(&tc_skip_wrapper)) goto skip; #if IS_BUILTIN(CONFIG_NET_CLS_BPF) if (tp->classify == cls_bpf_classify) return cls_bpf_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_U32) if (tp->classify == u32_classify) return u32_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_FLOWER) if (tp->classify == fl_classify) return fl_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_FW) if (tp->classify == fw_classify) return fw_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) if (tp->classify == mall_classify) return mall_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_BASIC) if (tp->classify == basic_classify) return basic_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) if (tp->classify == cls_cgroup_classify) return cls_cgroup_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_FLOW) if (tp->classify == flow_classify) return flow_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4) if (tp->classify == route4_classify) return route4_classify(skb, tp, res); #endif skip: return tp->classify(skb, tp, res); } #endif /* CONFIG_NET_CLS */ static inline void tc_wrapper_init(void) { #ifdef CONFIG_X86 if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) static_branch_enable(&tc_skip_wrapper); #endif } #else #define TC_INDIRECT_SCOPE static static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { return a->ops->act(skb, a, res); } static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { return tp->classify(skb, tp, res); } static inline void tc_wrapper_init(void) { } #endif #endif /* __NET_TC_WRAPPER_H */
51 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 /* * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, 2013, Ericsson AB * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "name_table.h" #include "subscr.h" #include "bearer.h" #include "net.h" #include "socket.h" #include "bcast.h" #include "node.h" #include "crypto.h" #include <linux/module.h> /* configurable TIPC parameters */ unsigned int tipc_net_id __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ static int __net_init tipc_init_net(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); int err; tn->net_id = 4711; tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; INIT_WORK(&tn->work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; get_random_bytes(&tn->random, sizeof(int)); INIT_LIST_HEAD(&tn->node_list); spin_lock_init(&tn->node_list_lock); #ifdef CONFIG_TIPC_CRYPTO err = tipc_crypto_start(&tn->crypto_tx, net, NULL); if (err) goto out_crypto; #endif err = tipc_sk_rht_init(net); if (err) goto out_sk_rht; err = tipc_nametbl_init(net); if (err) goto out_nametbl; err = tipc_bcast_init(net); if (err) goto out_bclink; err = tipc_attach_loopback(net); if (err) goto out_bclink; return 0; out_bclink: tipc_nametbl_stop(net); out_nametbl: tipc_sk_rht_destroy(net); out_sk_rht: #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_stop(&tn->crypto_tx); out_crypto: #endif return err; } static void __net_exit tipc_exit_net(struct net *net) { struct tipc_net *tn = tipc_net(net); tipc_detach_loopback(net); tipc_net_stop(net); /* Make sure the tipc_net_finalize_work() finished */ cancel_work_sync(&tn->work); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_stop(&tipc_net(net)->crypto_tx); #endif while (atomic_read(&tn->wq_count)) cond_resched(); } static void __net_exit tipc_pernet_pre_exit(struct net *net) { tipc_node_pre_cleanup_net(net); } static struct pernet_operations tipc_pernet_pre_exit_ops = { .pre_exit = tipc_pernet_pre_exit, }; static struct pernet_operations tipc_net_ops = { .init = tipc_init_net, .exit = tipc_exit_net, .id = &tipc_net_id, .size = sizeof(struct tipc_net), }; static struct pernet_operations tipc_topsrv_net_ops = { .init = tipc_topsrv_init_net, .exit = tipc_topsrv_exit_net, }; static int __init tipc_init(void) { int err; pr_info("Activated (version " TIPC_MOD_VER ")\n"); sysctl_tipc_rmem[0] = RCVBUF_MIN; sysctl_tipc_rmem[1] = RCVBUF_DEF; sysctl_tipc_rmem[2] = RCVBUF_MAX; err = tipc_register_sysctl(); if (err) goto out_sysctl; err = register_pernet_device(&tipc_net_ops); if (err) goto out_pernet; err = tipc_socket_init(); if (err) goto out_socket; err = register_pernet_device(&tipc_topsrv_net_ops); if (err) goto out_pernet_topsrv; err = register_pernet_subsys(&tipc_pernet_pre_exit_ops); if (err) goto out_register_pernet_subsys; err = tipc_bearer_setup(); if (err) goto out_bearer; err = tipc_netlink_start(); if (err) goto out_netlink; err = tipc_netlink_compat_start(); if (err) goto out_netlink_compat; pr_info("Started in single node mode\n"); return 0; out_netlink_compat: tipc_netlink_stop(); out_netlink: tipc_bearer_cleanup(); out_bearer: unregister_pernet_subsys(&tipc_pernet_pre_exit_ops); out_register_pernet_subsys: unregister_pernet_device(&tipc_topsrv_net_ops); out_pernet_topsrv: tipc_socket_stop(); out_socket: unregister_pernet_device(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: pr_err("Unable to start in single node mode\n"); return err; } static void __exit tipc_exit(void) { tipc_netlink_compat_stop(); tipc_netlink_stop(); tipc_bearer_cleanup(); unregister_pernet_subsys(&tipc_pernet_pre_exit_ops); unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); unregister_pernet_device(&tipc_net_ops); tipc_unregister_sysctl(); pr_info("Deactivated\n"); } module_init(tipc_init); module_exit(tipc_exit); MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(TIPC_MOD_VER);
16569 16572 348 348 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 // SPDX-License-Identifier: GPL-2.0-only /* Common code for 32 and 64-bit NUMA */ #include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/of.h> #include <linux/string.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/mmzone.h> #include <linux/ctype.h> #include <linux/nodemask.h> #include <linux/sched.h> #include <linux/topology.h> #include <linux/sort.h> #include <linux/numa_memblks.h> #include <asm/e820/api.h> #include <asm/proto.h> #include <asm/dma.h> #include <asm/amd_nb.h> #include "numa_internal.h" int numa_off; static __init int numa_setup(char *opt) { if (!opt) return -EINVAL; if (!strncmp(opt, "off", 3)) numa_off = 1; if (!strncmp(opt, "fake=", 5)) return numa_emu_cmdline(opt + 5); if (!strncmp(opt, "noacpi", 6)) disable_srat(); if (!strncmp(opt, "nohmat", 6)) disable_hmat(); return 0; } early_param("numa", numa_setup); /* * apicid, cpu, node mappings */ s16 __apicid_to_node[MAX_LOCAL_APIC] = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; int numa_cpu_node(int cpu) { u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) return __apicid_to_node[apicid]; return NUMA_NO_NODE; } cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); /* * Map cpu index to node index */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); void numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); /* early setting, no percpu area yet */ if (cpu_to_node_map) { cpu_to_node_map[cpu] = node; return; } #ifdef CONFIG_DEBUG_PER_CPU_MAPS if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); dump_stack(); return; } #endif per_cpu(x86_cpu_to_node_map, cpu) = node; set_cpu_numa_node(cpu, node); } void numa_clear_node(int cpu) { numa_set_node(cpu, NUMA_NO_NODE); } /* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * * Note: cpumask_of_node() is not valid until after this is done. * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ void __init setup_node_to_cpumask_map(void) { unsigned int node; /* setup nr_node_ids if not done yet */ if (nr_node_ids == MAX_NUMNODES) setup_nr_node_ids(); /* allocate the map */ for (node = 0; node < nr_node_ids; node++) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids); } static int __init numa_register_nodes(void) { int nid; if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; /* Finally register nodes. */ for_each_node_mask(nid, node_possible_map) { unsigned long start_pfn, end_pfn; /* * Note, get_pfn_range_for_nid() depends on * memblock_set_node() having already happened */ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); if (start_pfn >= end_pfn) continue; alloc_node_data(nid); node_set_online(nid); } /* Dump memblock with node info and return. */ memblock_dump_all(); return 0; } /* * There are unfortunately some poorly designed mainboards around that * only connect memory to a single CPU. This breaks the 1:1 cpu->node * mapping. To avoid this fill in the mapping for all possible CPUs, * as the number of CPUs is not known yet. We round robin the existing * nodes. */ static void __init numa_init_array(void) { int rr, i; rr = first_node(node_online_map); for (i = 0; i < nr_cpu_ids; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); rr = next_node_in(rr, node_online_map); } } static int __init numa_init(int (*init_func)(void)) { int i; int ret; for (i = 0; i < MAX_LOCAL_APIC; i++) set_apicid_to_node(i, NUMA_NO_NODE); ret = numa_memblks_init(init_func, /* memblock_force_top_down */ true); if (ret < 0) return ret; ret = numa_register_nodes(); if (ret < 0) return ret; for (i = 0; i < nr_cpu_ids; i++) { int nid = early_cpu_to_node(i); if (nid == NUMA_NO_NODE) continue; if (!node_online(nid)) numa_clear_node(i); } numa_init_array(); return 0; } /** * dummy_numa_init - Fallback dummy NUMA init * * Used if there's no underlying NUMA architecture, NUMA initialization * fails, or NUMA is disabled on the command line. * * Must online at least one node and add memory blocks that cover all * allowed memory. This function must not fail. */ static int __init dummy_numa_init(void) { printk(KERN_INFO "%s\n", numa_off ? "NUMA turned off" : "No NUMA configuration found"); printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n", 0LLU, PFN_PHYS(max_pfn) - 1); node_set(0, numa_nodes_parsed); numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); return 0; } /** * x86_numa_init - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The * last fallback is dummy single node config encompassing whole memory and * never fails. */ void __init x86_numa_init(void) { if (!numa_off) { #ifdef CONFIG_ACPI_NUMA if (!numa_init(x86_acpi_numa_init)) return; #endif #ifdef CONFIG_AMD_NUMA if (!numa_init(amd_numa_init)) return; #endif if (acpi_disabled && !numa_init(of_numa_init)) return; } numa_init(dummy_numa_init); } /* * A node may exist which has one or more Generic Initiators but no CPUs and no * memory. * * This function must be called after init_cpu_to_node(), to ensure that any * memoryless CPU nodes have already been brought online, and before the * node_data[nid] is needed for zone list setup in build_all_zonelists(). * * When this function is called, any nodes containing either memory and/or CPUs * will already be online and there is no need to do anything extra, even if * they also contain one or more Generic Initiators. */ void __init init_gi_nodes(void) { int nid; /* * Exclude this node from * bringup_nonboot_cpus * cpu_up * __try_online_node * register_one_node * because node_subsys is not initialized yet. * TODO remove dependency on node_online */ for_each_node_state(nid, N_GENERIC_INITIATOR) if (!node_online(nid)) node_set_online(nid); } /* * Setup early cpu_to_node. * * Populate cpu_to_node[] only if x86_cpu_to_apicid[], * and apicid_to_node[] tables have valid entries for a CPU. * This means we skip cpu_to_node[] initialisation for NUMA * emulation and faking node case (when running a kernel compiled * for NUMA on a non NUMA box), which is OK as cpu_to_node[] * is already initialized in a round robin manner at numa_init_array, * prior to this call, and this initialization is good enough * for the fake NUMA cases. * * Called before the per_cpu areas are setup. */ void __init init_cpu_to_node(void) { int cpu; u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); BUG_ON(cpu_to_apicid == NULL); for_each_possible_cpu(cpu) { int node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) continue; /* * Exclude this node from * bringup_nonboot_cpus * cpu_up * __try_online_node * register_one_node * because node_subsys is not initialized yet. * TODO remove dependency on node_online */ if (!node_online(node)) node_set_online(node); numa_set_node(cpu, node); } } #ifndef CONFIG_DEBUG_PER_CPU_MAPS # ifndef CONFIG_NUMA_EMU void numa_add_cpu(unsigned int cpu) { cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } void numa_remove_cpu(unsigned int cpu) { cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } # endif /* !CONFIG_NUMA_EMU */ #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ int __cpu_to_node(int cpu) { if (early_per_cpu_ptr(x86_cpu_to_node_map)) { printk(KERN_WARNING "cpu_to_node(%d): usage too early!\n", cpu); dump_stack(); return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; } return per_cpu(x86_cpu_to_node_map, cpu); } EXPORT_SYMBOL(__cpu_to_node); /* * Same function as cpu_to_node() but used if called before the * per_cpu areas are setup. */ int early_cpu_to_node(int cpu) { if (early_per_cpu_ptr(x86_cpu_to_node_map)) return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; if (!cpu_possible(cpu)) { printk(KERN_WARNING "early_cpu_to_node(%d): no per_cpu area!\n", cpu); dump_stack(); return NUMA_NO_NODE; } return per_cpu(x86_cpu_to_node_map, cpu); } void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable) { struct cpumask *mask; if (node == NUMA_NO_NODE) { /* early_cpu_to_node() already emits a warning and trace */ return; } mask = node_to_cpumask_map[node]; if (!cpumask_available(mask)) { pr_err("node_to_cpumask_map[%i] NULL\n", node); dump_stack(); return; } if (enable) cpumask_set_cpu(cpu, mask); else cpumask_clear_cpu(cpu, mask); printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n", enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, cpumask_pr_args(mask)); return; } # ifndef CONFIG_NUMA_EMU static void numa_set_cpumask(int cpu, bool enable) { debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); } void numa_add_cpu(unsigned int cpu) { numa_set_cpumask(cpu, true); } void numa_remove_cpu(unsigned int cpu) { numa_set_cpumask(cpu, false); } # endif /* !CONFIG_NUMA_EMU */ /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ const struct cpumask *cpumask_of_node(int node) { if ((unsigned)node >= nr_node_ids) { printk(KERN_WARNING "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n", node, nr_node_ids); dump_stack(); return cpu_none_mask; } if (!cpumask_available(node_to_cpumask_map[node])) { printk(KERN_WARNING "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); dump_stack(); return cpu_online_mask; } return node_to_cpumask_map[node]; } EXPORT_SYMBOL(cpumask_of_node); #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ #ifdef CONFIG_NUMA_EMU void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, unsigned int nr_emu_nids) { int i, j; /* * Transform __apicid_to_node table to use emulated nids by * reverse-mapping phys_nid. The maps should always exist but fall * back to zero just in case. */ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) { if (__apicid_to_node[i] == NUMA_NO_NODE) continue; for (j = 0; j < nr_emu_nids; j++) if (__apicid_to_node[i] == emu_nid_to_phys[j]) break; __apicid_to_node[i] = j < nr_emu_nids ? j : 0; } } u64 __init numa_emu_dma_end(void) { return PFN_PHYS(MAX_DMA32_PFN); } #endif /* CONFIG_NUMA_EMU */
3 3 3 3 3 1 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright 2019 Hans de Goede <hdegoede@redhat.com> */ #include <linux/module.h> #include <linux/pm.h> #include <linux/usb.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_state_helper.h> #include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> #include <drm/drm_format_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> static bool eco_mode; module_param(eco_mode, bool, 0644); MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); #define DRIVER_NAME "gm12u320" #define DRIVER_DESC "Grain Media GM12U320 USB projector display" #define DRIVER_DATE "2019" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 /* * The DLP has an actual width of 854 pixels, but that is not a multiple * of 8, breaking things left and right, so we export a width of 848. */ #define GM12U320_USER_WIDTH 848 #define GM12U320_REAL_WIDTH 854 #define GM12U320_HEIGHT 480 #define GM12U320_BLOCK_COUNT 20 #define GM12U320_ERR(fmt, ...) \ DRM_DEV_ERROR(gm12u320->dev.dev, fmt, ##__VA_ARGS__) #define MISC_RCV_EPT 1 #define DATA_RCV_EPT 2 #define DATA_SND_EPT 3 #define MISC_SND_EPT 4 #define DATA_BLOCK_HEADER_SIZE 84 #define DATA_BLOCK_CONTENT_SIZE 64512 #define DATA_BLOCK_FOOTER_SIZE 20 #define DATA_BLOCK_SIZE (DATA_BLOCK_HEADER_SIZE + \ DATA_BLOCK_CONTENT_SIZE + \ DATA_BLOCK_FOOTER_SIZE) #define DATA_LAST_BLOCK_CONTENT_SIZE 4032 #define DATA_LAST_BLOCK_SIZE (DATA_BLOCK_HEADER_SIZE + \ DATA_LAST_BLOCK_CONTENT_SIZE + \ DATA_BLOCK_FOOTER_SIZE) #define CMD_SIZE 31 #define READ_STATUS_SIZE 13 #define MISC_VALUE_SIZE 4 #define CMD_TIMEOUT 200 #define DATA_TIMEOUT 1000 #define IDLE_TIMEOUT 2000 #define FIRST_FRAME_TIMEOUT 2000 #define MISC_REQ_GET_SET_ECO_A 0xff #define MISC_REQ_GET_SET_ECO_B 0x35 /* Windows driver does once every second, with arg d = 1, other args 0 */ #define MISC_REQ_UNKNOWN1_A 0xff #define MISC_REQ_UNKNOWN1_B 0x38 /* Windows driver does this on init, with arg a, b = 0, c = 0xa0, d = 4 */ #define MISC_REQ_UNKNOWN2_A 0xa5 #define MISC_REQ_UNKNOWN2_B 0x00 struct gm12u320_device { struct drm_device dev; struct device *dmadev; struct drm_simple_display_pipe pipe; struct drm_connector conn; unsigned char *cmd_buf; unsigned char *data_buf[GM12U320_BLOCK_COUNT]; struct { struct delayed_work work; struct mutex lock; struct drm_framebuffer *fb; struct drm_rect rect; int frame; int draw_status_timeout; struct iosys_map src_map; } fb_update; }; #define to_gm12u320(__dev) container_of(__dev, struct gm12u320_device, dev) static const char cmd_data[CMD_SIZE] = { 0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x68, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x10, 0xff, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const char cmd_draw[CMD_SIZE] = { 0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0xfe, 0x00, 0x00, 0x00, 0xc0, 0xd1, 0x05, 0x00, 0x40, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const char cmd_misc[CMD_SIZE] = { 0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x80, 0x01, 0x10, 0xfd, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const char data_block_header[DATA_BLOCK_HEADER_SIZE] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x15, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x01, 0x00, 0x00, 0xdb }; static const char data_last_block_header[DATA_BLOCK_HEADER_SIZE] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x20, 0x00, 0xc0, 0x0f, 0x00, 0x00, 0x01, 0x00, 0x00, 0xd7 }; static const char data_block_footer[DATA_BLOCK_FOOTER_SIZE] = { 0xfb, 0x14, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x4f }; static inline struct usb_device *gm12u320_to_usb_device(struct gm12u320_device *gm12u320) { return interface_to_usbdev(to_usb_interface(gm12u320->dev.dev)); } static int gm12u320_usb_alloc(struct gm12u320_device *gm12u320) { int i, block_size; const char *hdr; gm12u320->cmd_buf = drmm_kmalloc(&gm12u320->dev, CMD_SIZE, GFP_KERNEL); if (!gm12u320->cmd_buf) return -ENOMEM; for (i = 0; i < GM12U320_BLOCK_COUNT; i++) { if (i == GM12U320_BLOCK_COUNT - 1) { block_size = DATA_LAST_BLOCK_SIZE; hdr = data_last_block_header; } else { block_size = DATA_BLOCK_SIZE; hdr = data_block_header; } gm12u320->data_buf[i] = drmm_kzalloc(&gm12u320->dev, block_size, GFP_KERNEL); if (!gm12u320->data_buf[i]) return -ENOMEM; memcpy(gm12u320->data_buf[i], hdr, DATA_BLOCK_HEADER_SIZE); memcpy(gm12u320->data_buf[i] + (block_size - DATA_BLOCK_FOOTER_SIZE), data_block_footer, DATA_BLOCK_FOOTER_SIZE); } return 0; } static int gm12u320_misc_request(struct gm12u320_device *gm12u320, u8 req_a, u8 req_b, u8 arg_a, u8 arg_b, u8 arg_c, u8 arg_d) { struct usb_device *udev = gm12u320_to_usb_device(gm12u320); int ret, len; memcpy(gm12u320->cmd_buf, &cmd_misc, CMD_SIZE); gm12u320->cmd_buf[20] = req_a; gm12u320->cmd_buf[21] = req_b; gm12u320->cmd_buf[22] = arg_a; gm12u320->cmd_buf[23] = arg_b; gm12u320->cmd_buf[24] = arg_c; gm12u320->cmd_buf[25] = arg_d; /* Send request */ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, MISC_SND_EPT), gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT); if (ret || len != CMD_SIZE) { GM12U320_ERR("Misc. req. error %d\n", ret); return -EIO; } /* Read value */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, MISC_RCV_EPT), gm12u320->cmd_buf, MISC_VALUE_SIZE, &len, DATA_TIMEOUT); if (ret || len != MISC_VALUE_SIZE) { GM12U320_ERR("Misc. value error %d\n", ret); return -EIO; } /* cmd_buf[0] now contains the read value, which we don't use */ /* Read status */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, MISC_RCV_EPT), gm12u320->cmd_buf, READ_STATUS_SIZE, &len, CMD_TIMEOUT); if (ret || len != READ_STATUS_SIZE) { GM12U320_ERR("Misc. status error %d\n", ret); return -EIO; } return 0; } static void gm12u320_32bpp_to_24bpp_packed(u8 *dst, u8 *src, int len) { while (len--) { *dst++ = *src++; *dst++ = *src++; *dst++ = *src++; src++; } } static void gm12u320_copy_fb_to_blocks(struct gm12u320_device *gm12u320) { int block, dst_offset, len, remain, ret, x1, x2, y1, y2; struct drm_framebuffer *fb; void *vaddr; u8 *src; mutex_lock(&gm12u320->fb_update.lock); if (!gm12u320->fb_update.fb) goto unlock; fb = gm12u320->fb_update.fb; x1 = gm12u320->fb_update.rect.x1; x2 = gm12u320->fb_update.rect.x2; y1 = gm12u320->fb_update.rect.y1; y2 = gm12u320->fb_update.rect.y2; vaddr = gm12u320->fb_update.src_map.vaddr; /* TODO: Use mapping abstraction properly */ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) { GM12U320_ERR("drm_gem_fb_begin_cpu_access err: %d\n", ret); goto put_fb; } src = vaddr + y1 * fb->pitches[0] + x1 * 4; x1 += (GM12U320_REAL_WIDTH - GM12U320_USER_WIDTH) / 2; x2 += (GM12U320_REAL_WIDTH - GM12U320_USER_WIDTH) / 2; for (; y1 < y2; y1++) { remain = 0; len = (x2 - x1) * 3; dst_offset = (y1 * GM12U320_REAL_WIDTH + x1) * 3; block = dst_offset / DATA_BLOCK_CONTENT_SIZE; dst_offset %= DATA_BLOCK_CONTENT_SIZE; if ((dst_offset + len) > DATA_BLOCK_CONTENT_SIZE) { remain = dst_offset + len - DATA_BLOCK_CONTENT_SIZE; len = DATA_BLOCK_CONTENT_SIZE - dst_offset; } dst_offset += DATA_BLOCK_HEADER_SIZE; len /= 3; gm12u320_32bpp_to_24bpp_packed( gm12u320->data_buf[block] + dst_offset, src, len); if (remain) { block++; dst_offset = DATA_BLOCK_HEADER_SIZE; gm12u320_32bpp_to_24bpp_packed( gm12u320->data_buf[block] + dst_offset, src + len * 4, remain / 3); } src += fb->pitches[0]; } drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); put_fb: drm_framebuffer_put(fb); gm12u320->fb_update.fb = NULL; unlock: mutex_unlock(&gm12u320->fb_update.lock); } static void gm12u320_fb_update_work(struct work_struct *work) { struct gm12u320_device *gm12u320 = container_of(to_delayed_work(work), struct gm12u320_device, fb_update.work); struct usb_device *udev = gm12u320_to_usb_device(gm12u320); int block, block_size, len; int ret = 0; gm12u320_copy_fb_to_blocks(gm12u320); for (block = 0; block < GM12U320_BLOCK_COUNT; block++) { if (block == GM12U320_BLOCK_COUNT - 1) block_size = DATA_LAST_BLOCK_SIZE; else block_size = DATA_BLOCK_SIZE; /* Send data command to device */ memcpy(gm12u320->cmd_buf, cmd_data, CMD_SIZE); gm12u320->cmd_buf[8] = block_size & 0xff; gm12u320->cmd_buf[9] = block_size >> 8; gm12u320->cmd_buf[20] = 0xfc - block * 4; gm12u320->cmd_buf[21] = block | (gm12u320->fb_update.frame << 7); ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, DATA_SND_EPT), gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT); if (ret || len != CMD_SIZE) goto err; /* Send data block to device */ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, DATA_SND_EPT), gm12u320->data_buf[block], block_size, &len, DATA_TIMEOUT); if (ret || len != block_size) goto err; /* Read status */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, DATA_RCV_EPT), gm12u320->cmd_buf, READ_STATUS_SIZE, &len, CMD_TIMEOUT); if (ret || len != READ_STATUS_SIZE) goto err; } /* Send draw command to device */ memcpy(gm12u320->cmd_buf, cmd_draw, CMD_SIZE); ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, DATA_SND_EPT), gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT); if (ret || len != CMD_SIZE) goto err; /* Read status */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, DATA_RCV_EPT), gm12u320->cmd_buf, READ_STATUS_SIZE, &len, gm12u320->fb_update.draw_status_timeout); if (ret || len != READ_STATUS_SIZE) goto err; gm12u320->fb_update.draw_status_timeout = CMD_TIMEOUT; gm12u320->fb_update.frame = !gm12u320->fb_update.frame; /* * We must draw a frame every 2s otherwise the projector * switches back to showing its logo. */ queue_delayed_work(system_long_wq, &gm12u320->fb_update.work, msecs_to_jiffies(IDLE_TIMEOUT)); return; err: /* Do not log errors caused by module unload or device unplug */ if (ret != -ENODEV && ret != -ECONNRESET && ret != -ESHUTDOWN) GM12U320_ERR("Frame update error: %d\n", ret); } static void gm12u320_fb_mark_dirty(struct drm_framebuffer *fb, const struct iosys_map *map, struct drm_rect *dirty) { struct gm12u320_device *gm12u320 = to_gm12u320(fb->dev); struct drm_framebuffer *old_fb = NULL; bool wakeup = false; mutex_lock(&gm12u320->fb_update.lock); if (gm12u320->fb_update.fb != fb) { old_fb = gm12u320->fb_update.fb; drm_framebuffer_get(fb); gm12u320->fb_update.fb = fb; gm12u320->fb_update.rect = *dirty; gm12u320->fb_update.src_map = *map; wakeup = true; } else { struct drm_rect *rect = &gm12u320->fb_update.rect; rect->x1 = min(rect->x1, dirty->x1); rect->y1 = min(rect->y1, dirty->y1); rect->x2 = max(rect->x2, dirty->x2); rect->y2 = max(rect->y2, dirty->y2); } mutex_unlock(&gm12u320->fb_update.lock); if (wakeup) mod_delayed_work(system_long_wq, &gm12u320->fb_update.work, 0); if (old_fb) drm_framebuffer_put(old_fb); } static void gm12u320_stop_fb_update(struct gm12u320_device *gm12u320) { struct drm_framebuffer *old_fb; cancel_delayed_work_sync(&gm12u320->fb_update.work); mutex_lock(&gm12u320->fb_update.lock); old_fb = gm12u320->fb_update.fb; gm12u320->fb_update.fb = NULL; iosys_map_clear(&gm12u320->fb_update.src_map); mutex_unlock(&gm12u320->fb_update.lock); drm_framebuffer_put(old_fb); } static int gm12u320_set_ecomode(struct gm12u320_device *gm12u320) { return gm12u320_misc_request(gm12u320, MISC_REQ_GET_SET_ECO_A, MISC_REQ_GET_SET_ECO_B, 0x01 /* set */, eco_mode ? 0x01 : 0x00, 0x00, 0x01); } /* ------------------------------------------------------------------ */ /* gm12u320 connector */ /* * We use fake EDID info so that userspace know that it is dealing with * an Acer projector, rather then listing this as an "unknown" monitor. * Note this assumes this driver is only ever used with the Acer C120, if we * add support for other devices the vendor and model should be parameterized. */ static const struct edid gm12u320_edid = { .header = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }, .mfg_id = { 0x04, 0x72 }, /* "ACR" */ .prod_code = { 0x20, 0xc1 }, /* C120h */ .serial = 0xaa55aa55, .mfg_week = 1, .mfg_year = 16, .version = 1, /* EDID 1.3 */ .revision = 3, /* EDID 1.3 */ .input = 0x08, /* Analog input */ .features = 0x0a, /* Pref timing in DTD 1 */ .standard_timings = { { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 } }, .detailed_timings = { { .pixel_clock = 3383, /* hactive = 848, hblank = 256 */ .data.pixel_data.hactive_lo = 0x50, .data.pixel_data.hblank_lo = 0x00, .data.pixel_data.hactive_hblank_hi = 0x31, /* vactive = 480, vblank = 28 */ .data.pixel_data.vactive_lo = 0xe0, .data.pixel_data.vblank_lo = 0x1c, .data.pixel_data.vactive_vblank_hi = 0x10, /* hsync offset 40 pw 128, vsync offset 1 pw 4 */ .data.pixel_data.hsync_offset_lo = 0x28, .data.pixel_data.hsync_pulse_width_lo = 0x80, .data.pixel_data.vsync_offset_pulse_width_lo = 0x14, .data.pixel_data.hsync_vsync_offset_pulse_width_hi = 0x00, /* Digital separate syncs, hsync+, vsync+ */ .data.pixel_data.misc = 0x1e, }, { .pixel_clock = 0, .data.other_data.type = 0xfd, /* Monitor ranges */ .data.other_data.data.range.min_vfreq = 59, .data.other_data.data.range.max_vfreq = 61, .data.other_data.data.range.min_hfreq_khz = 29, .data.other_data.data.range.max_hfreq_khz = 32, .data.other_data.data.range.pixel_clock_mhz = 4, /* 40 MHz */ .data.other_data.data.range.flags = 0, .data.other_data.data.range.formula.cvt = { 0xa0, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }, }, { .pixel_clock = 0, .data.other_data.type = 0xfc, /* Model string */ .data.other_data.data.str.str = { 'P', 'r', 'o', 'j', 'e', 'c', 't', 'o', 'r', '\n', ' ', ' ', ' ' }, }, { .pixel_clock = 0, .data.other_data.type = 0xfe, /* Unspecified text / padding */ .data.other_data.data.str.str = { '\n', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, } }, .checksum = 0x13, }; static int gm12u320_conn_get_modes(struct drm_connector *connector) { const struct drm_edid *drm_edid; int count; drm_edid = drm_edid_alloc(&gm12u320_edid, sizeof(gm12u320_edid)); drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); drm_edid_free(drm_edid); return count; } static const struct drm_connector_helper_funcs gm12u320_conn_helper_funcs = { .get_modes = gm12u320_conn_get_modes, }; static const struct drm_connector_funcs gm12u320_conn_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; static int gm12u320_conn_init(struct gm12u320_device *gm12u320) { drm_connector_helper_add(&gm12u320->conn, &gm12u320_conn_helper_funcs); return drm_connector_init(&gm12u320->dev, &gm12u320->conn, &gm12u320_conn_funcs, DRM_MODE_CONNECTOR_VGA); } /* ------------------------------------------------------------------ */ /* gm12u320 (simple) display pipe */ static void gm12u320_pipe_enable(struct drm_simple_display_pipe *pipe, struct drm_crtc_state *crtc_state, struct drm_plane_state *plane_state) { struct drm_rect rect = { 0, 0, GM12U320_USER_WIDTH, GM12U320_HEIGHT }; struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); gm12u320->fb_update.draw_status_timeout = FIRST_FRAME_TIMEOUT; gm12u320_fb_mark_dirty(plane_state->fb, &shadow_plane_state->data[0], &rect); } static void gm12u320_pipe_disable(struct drm_simple_display_pipe *pipe) { struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev); gm12u320_stop_fb_update(gm12u320); } static void gm12u320_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_plane_state *old_state) { struct drm_plane_state *state = pipe->plane.state; struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); struct drm_rect rect; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) gm12u320_fb_mark_dirty(state->fb, &shadow_plane_state->data[0], &rect); } static const struct drm_simple_display_pipe_funcs gm12u320_pipe_funcs = { .enable = gm12u320_pipe_enable, .disable = gm12u320_pipe_disable, .update = gm12u320_pipe_update, DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, }; static const uint32_t gm12u320_pipe_formats[] = { DRM_FORMAT_XRGB8888, }; static const uint64_t gm12u320_pipe_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; /* * FIXME: Dma-buf sharing requires DMA support by the importing device. * This function is a workaround to make USB devices work as well. * See todo.rst for how to fix the issue in the dma-buf framework. */ static struct drm_gem_object *gm12u320_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { struct gm12u320_device *gm12u320 = to_gm12u320(dev); if (!gm12u320->dmadev) return ERR_PTR(-ENODEV); return drm_gem_prime_import_dev(dev, dma_buf, gm12u320->dmadev); } DEFINE_DRM_GEM_FOPS(gm12u320_fops); static const struct drm_driver gm12u320_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .fops = &gm12u320_fops, DRM_GEM_SHMEM_DRIVER_OPS, .gem_prime_import = gm12u320_gem_prime_import, DRM_FBDEV_SHMEM_DRIVER_OPS, }; static const struct drm_mode_config_funcs gm12u320_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; static int gm12u320_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct gm12u320_device *gm12u320; struct drm_device *dev; int ret; /* * The gm12u320 presents itself to the system as 2 usb mass-storage * interfaces, we only care about / need the first one. */ if (interface->cur_altsetting->desc.bInterfaceNumber != 0) return -ENODEV; gm12u320 = devm_drm_dev_alloc(&interface->dev, &gm12u320_drm_driver, struct gm12u320_device, dev); if (IS_ERR(gm12u320)) return PTR_ERR(gm12u320); dev = &gm12u320->dev; gm12u320->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); if (!gm12u320->dmadev) drm_warn(dev, "buffer sharing not supported"); /* not an error */ INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work); mutex_init(&gm12u320->fb_update.lock); ret = drmm_mode_config_init(dev); if (ret) goto err_put_device; dev->mode_config.min_width = GM12U320_USER_WIDTH; dev->mode_config.max_width = GM12U320_USER_WIDTH; dev->mode_config.min_height = GM12U320_HEIGHT; dev->mode_config.max_height = GM12U320_HEIGHT; dev->mode_config.funcs = &gm12u320_mode_config_funcs; ret = gm12u320_usb_alloc(gm12u320); if (ret) goto err_put_device; ret = gm12u320_set_ecomode(gm12u320); if (ret) goto err_put_device; ret = gm12u320_conn_init(gm12u320); if (ret) goto err_put_device; ret = drm_simple_display_pipe_init(&gm12u320->dev, &gm12u320->pipe, &gm12u320_pipe_funcs, gm12u320_pipe_formats, ARRAY_SIZE(gm12u320_pipe_formats), gm12u320_pipe_modifiers, &gm12u320->conn); if (ret) goto err_put_device; drm_mode_config_reset(dev); usb_set_intfdata(interface, dev); ret = drm_dev_register(dev, 0); if (ret) goto err_put_device; drm_client_setup(dev, NULL); return 0; err_put_device: put_device(gm12u320->dmadev); return ret; } static void gm12u320_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); struct gm12u320_device *gm12u320 = to_gm12u320(dev); put_device(gm12u320->dmadev); gm12u320->dmadev = NULL; drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); } static int gm12u320_suspend(struct usb_interface *interface, pm_message_t message) { struct drm_device *dev = usb_get_intfdata(interface); return drm_mode_config_helper_suspend(dev); } static int gm12u320_resume(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); struct gm12u320_device *gm12u320 = to_gm12u320(dev); gm12u320_set_ecomode(gm12u320); return drm_mode_config_helper_resume(dev); } static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1de1, 0xc102) }, {}, }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver gm12u320_usb_driver = { .name = "gm12u320", .probe = gm12u320_usb_probe, .disconnect = gm12u320_usb_disconnect, .id_table = id_table, .suspend = pm_ptr(gm12u320_suspend), .resume = pm_ptr(gm12u320_resume), .reset_resume = pm_ptr(gm12u320_resume), }; module_usb_driver(gm12u320_usb_driver); MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); MODULE_DESCRIPTION("GM12U320 driver for USB projectors"); MODULE_LICENSE("GPL");
1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 12 1 12 12 12 12 2 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. */ #include <linux/skbuff.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_bonding.h> #include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/bonding.h> #include <net/bond_3ad.h> #include <net/netlink.h> /* General definitions */ #define AD_SHORT_TIMEOUT 1 #define AD_LONG_TIMEOUT 0 #define AD_STANDBY 0x2 #define AD_MAX_TX_IN_SECOND 3 #define AD_COLLECTOR_MAX_DELAY 0 /* Timer definitions (43.4.4 in the 802.3ad standard) */ #define AD_FAST_PERIODIC_TIME 1 #define AD_SLOW_PERIODIC_TIME 30 #define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) #define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ #define AD_PORT_BEGIN 0x1 #define AD_PORT_LACP_ENABLED 0x2 #define AD_PORT_ACTOR_CHURN 0x4 #define AD_PORT_PARTNER_CHURN 0x8 #define AD_PORT_READY 0x10 #define AD_PORT_READY_N 0x20 #define AD_PORT_MATCHED 0x40 #define AD_PORT_STANDBY 0x80 #define AD_PORT_SELECTED 0x100 #define AD_PORT_MOVED 0x200 #define AD_PORT_CHURNED (AD_PORT_ACTOR_CHURN | AD_PORT_PARTNER_CHURN) /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) * -------------------------------------------------------------- * Port key | User key (10 bits) | Speed (5 bits) | Duplex| * -------------------------------------------------------------- * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E #define AD_USER_KEY_MASKS 0xFFC0 enum ad_link_speed_type { AD_LINK_SPEED_1MBPS = 1, AD_LINK_SPEED_10MBPS, AD_LINK_SPEED_100MBPS, AD_LINK_SPEED_1000MBPS, AD_LINK_SPEED_2500MBPS, AD_LINK_SPEED_5000MBPS, AD_LINK_SPEED_10000MBPS, AD_LINK_SPEED_14000MBPS, AD_LINK_SPEED_20000MBPS, AD_LINK_SPEED_25000MBPS, AD_LINK_SPEED_40000MBPS, AD_LINK_SPEED_50000MBPS, AD_LINK_SPEED_56000MBPS, AD_LINK_SPEED_100000MBPS, AD_LINK_SPEED_200000MBPS, AD_LINK_SPEED_400000MBPS, AD_LINK_SPEED_800000MBPS, }; /* compare MAC addresses */ #define MAC_ADDRESS_EQUAL(A, B) \ ether_addr_equal_64bits((const u8 *)A, (const u8 *)B) static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); static void ad_periodic_machine(struct port *port, struct bond_params *bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_port(struct port *port, int lacp_fast); static void ad_enable_collecting(struct port *port); static void ad_disable_distributing(struct port *port, bool *update_slave_arr); static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); static void ad_marker_response_received(struct bond_marker *marker, struct port *port); static void ad_update_actor_keys(struct port *port, bool reset); /* ================= api to bonding and kernel code ================== */ /** * __get_bond_by_port - get the port's bonding struct * @port: the port we're looking at * * Return @port's bonding struct, or %NULL if it can't be found. */ static inline struct bonding *__get_bond_by_port(struct port *port) { if (port->slave == NULL) return NULL; return bond_get_bond_by_slave(port->slave); } /** * __get_first_agg - get the first aggregator in the bond * @port: the port we're looking at * * Return the aggregator of the first slave in @bond, or %NULL if it can't be * found. * The caller must hold RCU or RTNL lock. */ static inline struct aggregator *__get_first_agg(struct port *port) { struct bonding *bond = __get_bond_by_port(port); struct slave *first_slave; struct aggregator *agg; /* If there's no bond for this port, or bond has no slaves */ if (bond == NULL) return NULL; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL; rcu_read_unlock(); return agg; } /** * __agg_has_partner - see if we have a partner * @agg: the agregator we're looking at * * Return nonzero if aggregator has a partner (denoted by a non-zero ether * address for the partner). Return 0 if not. */ static inline int __agg_has_partner(struct aggregator *agg) { return !is_zero_ether_addr(agg->partner_system.mac_addr_value); } /** * __disable_distributing_port - disable the port's slave for distributing. * Port will still be able to collect. * @port: the port we're looking at * * This will disable only distributing on the port's slave. */ static void __disable_distributing_port(struct port *port) { bond_set_slave_tx_disabled_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_collecting_port - enable the port's slave for collecting, * if it's up * @port: the port we're looking at * * This will enable only collecting on the port's slave. */ static void __enable_collecting_port(struct port *port) { struct slave *slave = port->slave; if (slave->link == BOND_LINK_UP && bond_slave_is_up(slave)) bond_set_slave_rx_enabled_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __disable_port - disable the port's slave * @port: the port we're looking at * * This will disable both collecting and distributing on the port's slave. */ static inline void __disable_port(struct port *port) { bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_port - enable the port's slave, if it's up * @port: the port we're looking at * * This will enable both collecting and distributing on the port's slave. */ static inline void __enable_port(struct port *port) { struct slave *slave = port->slave; if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __port_move_to_attached_state - check if port should transition back to attached * state. * @port: the port we're looking at */ static bool __port_move_to_attached_state(struct port *port) { if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) port->sm_mux_state = AD_MUX_ATTACHED; return port->sm_mux_state == AD_MUX_ATTACHED; } /** * __port_is_collecting_distributing - check if the port's slave is in the * combined collecting/distributing state * @port: the port we're looking at */ static int __port_is_collecting_distributing(struct port *port) { return bond_is_active_slave(port->slave); } /** * __get_agg_selection_mode - get the aggregator selection mode * @port: the port we're looking at * * Get the aggregator selection mode. Can be %STABLE, %BANDWIDTH or %COUNT. */ static inline u32 __get_agg_selection_mode(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return BOND_AD_STABLE; return bond->params.ad_select; } /** * __check_agg_selection_timer - check if the selection timer has expired * @port: the port we're looking at */ static inline int __check_agg_selection_timer(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return 0; return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0; } /** * __get_link_speed - get a port's speed * @port: the port we're looking at * * Return @port's speed in 802.3ad enum format. i.e. one of: * 0, * %AD_LINK_SPEED_10MBPS, * %AD_LINK_SPEED_100MBPS, * %AD_LINK_SPEED_1000MBPS, * %AD_LINK_SPEED_2500MBPS, * %AD_LINK_SPEED_5000MBPS, * %AD_LINK_SPEED_10000MBPS * %AD_LINK_SPEED_14000MBPS, * %AD_LINK_SPEED_20000MBPS * %AD_LINK_SPEED_25000MBPS * %AD_LINK_SPEED_40000MBPS * %AD_LINK_SPEED_50000MBPS * %AD_LINK_SPEED_56000MBPS * %AD_LINK_SPEED_100000MBPS * %AD_LINK_SPEED_200000MBPS * %AD_LINK_SPEED_400000MBPS * %AD_LINK_SPEED_800000MBPS */ static u16 __get_link_speed(struct port *port) { struct slave *slave = port->slave; u16 speed; /* this if covers only a special case: when the configuration starts * with link down, it sets the speed to 0. * This is done in spite of the fact that the e100 driver reports 0 * to be compatible with MVT in the future. */ if (slave->link != BOND_LINK_UP) speed = 0; else { switch (slave->speed) { case SPEED_10: speed = AD_LINK_SPEED_10MBPS; break; case SPEED_100: speed = AD_LINK_SPEED_100MBPS; break; case SPEED_1000: speed = AD_LINK_SPEED_1000MBPS; break; case SPEED_2500: speed = AD_LINK_SPEED_2500MBPS; break; case SPEED_5000: speed = AD_LINK_SPEED_5000MBPS; break; case SPEED_10000: speed = AD_LINK_SPEED_10000MBPS; break; case SPEED_14000: speed = AD_LINK_SPEED_14000MBPS; break; case SPEED_20000: speed = AD_LINK_SPEED_20000MBPS; break; case SPEED_25000: speed = AD_LINK_SPEED_25000MBPS; break; case SPEED_40000: speed = AD_LINK_SPEED_40000MBPS; break; case SPEED_50000: speed = AD_LINK_SPEED_50000MBPS; break; case SPEED_56000: speed = AD_LINK_SPEED_56000MBPS; break; case SPEED_100000: speed = AD_LINK_SPEED_100000MBPS; break; case SPEED_200000: speed = AD_LINK_SPEED_200000MBPS; break; case SPEED_400000: speed = AD_LINK_SPEED_400000MBPS; break; case SPEED_800000: speed = AD_LINK_SPEED_800000MBPS; break; default: /* unknown speed value from ethtool. shouldn't happen */ if (slave->speed != SPEED_UNKNOWN) pr_err_once("%s: (slave %s): unknown ethtool speed (%d) for port %d (set it to 0)\n", slave->bond->dev->name, slave->dev->name, slave->speed, port->actor_port_number); speed = 0; break; } } slave_dbg(slave->bond->dev, slave->dev, "Port %d Received link speed %d update from adapter\n", port->actor_port_number, speed); return speed; } /** * __get_duplex - get a port's duplex * @port: the port we're looking at * * Return @port's duplex in 802.3ad bitmask format. i.e.: * 0x01 if in full duplex * 0x00 otherwise */ static u8 __get_duplex(struct port *port) { struct slave *slave = port->slave; u8 retval = 0x0; /* handling a special case: when the configuration starts with * link down, it sets the duplex to 0. */ if (slave->link == BOND_LINK_UP) { switch (slave->duplex) { case DUPLEX_FULL: retval = 0x1; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status full duplex update from adapter\n", port->actor_port_number); break; case DUPLEX_HALF: default: retval = 0x0; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status NOT full duplex update from adapter\n", port->actor_port_number); break; } } return retval; } static void __ad_actor_update_port(struct port *port) { const struct bonding *bond = bond_get_bond_by_slave(port->slave); port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; } /* Conversions */ /** * __ad_timer_to_ticks - convert a given timer type to AD module ticks * @timer_type: which timer to operate * @par: timer parameter. see below * * If @timer_type is %current_while_timer, @par indicates long/short timer. * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, * %SLOW_PERIODIC_TIME. */ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) { u16 retval = 0; /* to silence the compiler */ switch (timer_type) { case AD_CURRENT_WHILE_TIMER: /* for rx machine usage */ if (par) retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); else retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); break; case AD_ACTOR_CHURN_TIMER: /* for local churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_PERIODIC_TIMER: /* for periodic machine */ retval = (par*ad_ticks_per_sec); /* long timeout */ break; case AD_PARTNER_CHURN_TIMER: /* for remote churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_WAIT_WHILE_TIMER: /* for selection machine */ retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); break; } return retval; } /* ================= ad_rx_machine helper functions ================== */ /** * __choose_matched - update a port's matched variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the matched variable, using parameter values from a * newly received lacpdu. Parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the actor. Matched is set to TRUE if all of these parameters * match and the PDU parameter partner_state.aggregation has the same value as * actor_oper_port_state.aggregation and lacp will actively maintain the link * in the aggregation. Matched is also set to TRUE if the value of * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates * an individual link and lacp will actively maintain the link. Otherwise, * matched is set to FALSE. LACP is considered to be actively maintaining the * link if either the PDU's actor_state.lacp_activity variable is TRUE or both * the actor's actor_oper_port_state.lacp_activity and the PDU's * partner_state.lacp_activity variables are TRUE. * * Note: the AD_PORT_MATCHED "variable" is not specified by 802.3ad; it is * used here to implement the language from 802.3ad 43.4.9 that requires * recordPDU to "match" the LACPDU parameters to the stored values. */ static void __choose_matched(struct lacpdu *lacpdu, struct port *port) { /* check if all parameters are alike * or this is individual link(aggregation == FALSE) * then update the state machine Matched variable. */ if (((ntohs(lacpdu->partner_port) == port->actor_port_number) && (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) && (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && ((lacpdu->partner_state & LACP_STATE_AGGREGATION) == (port->actor_oper_port_state & LACP_STATE_AGGREGATION))) || ((lacpdu->actor_state & LACP_STATE_AGGREGATION) == 0) ) { port->sm_vars |= AD_PORT_MATCHED; } else { port->sm_vars &= ~AD_PORT_MATCHED; } } /** * __record_pdu - record parameters from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Record the parameter values for the Actor carried in a received lacpdu as * the current partner operational parameter values and sets * actor_oper_port_state.defaulted to FALSE. */ static void __record_pdu(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { struct port_params *partner = &port->partner_oper; __choose_matched(lacpdu, port); /* record the new parameter values for the partner * operational */ partner->port_number = ntohs(lacpdu->actor_port); partner->port_priority = ntohs(lacpdu->actor_port_priority); partner->system = lacpdu->actor_system; partner->system_priority = ntohs(lacpdu->actor_system_priority); partner->key = ntohs(lacpdu->actor_key); partner->port_state = lacpdu->actor_state; /* set actor_oper_port_state.defaulted to FALSE */ port->actor_oper_port_state &= ~LACP_STATE_DEFAULTED; /* set the partner sync. to on if the partner is sync, * and the port is matched */ if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & LACP_STATE_SYNCHRONIZATION)) { partner->port_state |= LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=1\n"); } else { partner->port_state &= ~LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=0\n"); } } } /** * __record_default - record default parameters * @port: the port we're looking at * * This function records the default parameter values for the partner carried * in the Partner Admin parameters as the current partner operational parameter * values and sets actor_oper_port_state.defaulted to TRUE. */ static void __record_default(struct port *port) { if (port) { /* record the partner admin parameters */ memcpy(&port->partner_oper, &port->partner_admin, sizeof(struct port_params)); /* set actor_oper_port_state.defaulted to true */ port->actor_oper_port_state |= LACP_STATE_DEFAULTED; } } /** * __update_selected - update a port's Selected variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the selected variable, using parameter values from a * newly received lacpdu. The parameter values for the Actor carried in the * received PDU are compared with the corresponding operational parameter * values for the ports partner. If one or more of the comparisons shows that * the value(s) received in the PDU differ from the current operational values, * then selected is set to FALSE and actor_oper_port_state.synchronization is * set to out_of_sync. Otherwise, selected remains unchanged. */ static void __update_selected(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { const struct port_params *partner = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (ntohs(lacpdu->actor_port) != partner->port_number || ntohs(lacpdu->actor_port_priority) != partner->port_priority || !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) || ntohs(lacpdu->actor_system_priority) != partner->system_priority || ntohs(lacpdu->actor_key) != partner->key || (lacpdu->actor_state & LACP_STATE_AGGREGATION) != (partner->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_default_selected - update a port's Selected variable from Partner * @port: the port we're looking at * * This function updates the value of the selected variable, using the partner * administrative parameter values. The administrative values are compared with * the corresponding operational parameter values for the partner. If one or * more of the comparisons shows that the administrative value(s) differ from * the current operational values, then Selected is set to FALSE and * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, * Selected remains unchanged. */ static void __update_default_selected(struct port *port) { if (port) { const struct port_params *admin = &port->partner_admin; const struct port_params *oper = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (admin->port_number != oper->port_number || admin->port_priority != oper->port_priority || !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) || admin->system_priority != oper->system_priority || admin->key != oper->key || (admin->port_state & LACP_STATE_AGGREGATION) != (oper->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_ntt - update a port's ntt variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Updates the value of the ntt variable, using parameter values from a newly * received lacpdu. The parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the Actor. If one or more of the comparisons shows that the * value(s) received in the PDU differ from the current operational values, * then ntt is set to TRUE. Otherwise, ntt remains unchanged. */ static void __update_ntt(struct lacpdu *lacpdu, struct port *port) { /* validate lacpdu and port */ if (lacpdu && port) { /* check if any parameter is different then * update the port->ntt. */ if ((ntohs(lacpdu->partner_port) != port->actor_port_number) || (ntohs(lacpdu->partner_port_priority) != port->actor_port_priority) || !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) || (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) || (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) || ((lacpdu->partner_state & LACP_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY)) || ((lacpdu->partner_state & LACP_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)) || ((lacpdu->partner_state & LACP_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) || ((lacpdu->partner_state & LACP_STATE_AGGREGATION) != (port->actor_oper_port_state & LACP_STATE_AGGREGATION)) ) { port->ntt = true; } } } /** * __agg_ports_are_ready - check if all ports in an aggregator are ready * @aggregator: the aggregator we're looking at * */ static int __agg_ports_are_ready(struct aggregator *aggregator) { struct port *port; int retval = 1; if (aggregator) { /* scan all ports in this aggregator to verfy if they are * all ready. */ for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (!(port->sm_vars & AD_PORT_READY_N)) { retval = 0; break; } } } return retval; } /** * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator * @aggregator: the aggregator we're looking at * @val: Should the ports' ready bit be set on or off * */ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) { struct port *port; for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (val) port->sm_vars |= AD_PORT_READY; else port->sm_vars &= ~AD_PORT_READY; } } static int __agg_active_ports(struct aggregator *agg) { struct port *port; int active = 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (port->is_enabled) active++; } return active; } /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at * */ static u32 __get_agg_bandwidth(struct aggregator *aggregator) { int nports = __agg_active_ports(aggregator); u32 bandwidth = 0; if (nports) { switch (__get_link_speed(aggregator->lag_ports)) { case AD_LINK_SPEED_1MBPS: bandwidth = nports; break; case AD_LINK_SPEED_10MBPS: bandwidth = nports * 10; break; case AD_LINK_SPEED_100MBPS: bandwidth = nports * 100; break; case AD_LINK_SPEED_1000MBPS: bandwidth = nports * 1000; break; case AD_LINK_SPEED_2500MBPS: bandwidth = nports * 2500; break; case AD_LINK_SPEED_5000MBPS: bandwidth = nports * 5000; break; case AD_LINK_SPEED_10000MBPS: bandwidth = nports * 10000; break; case AD_LINK_SPEED_14000MBPS: bandwidth = nports * 14000; break; case AD_LINK_SPEED_20000MBPS: bandwidth = nports * 20000; break; case AD_LINK_SPEED_25000MBPS: bandwidth = nports * 25000; break; case AD_LINK_SPEED_40000MBPS: bandwidth = nports * 40000; break; case AD_LINK_SPEED_50000MBPS: bandwidth = nports * 50000; break; case AD_LINK_SPEED_56000MBPS: bandwidth = nports * 56000; break; case AD_LINK_SPEED_100000MBPS: bandwidth = nports * 100000; break; case AD_LINK_SPEED_200000MBPS: bandwidth = nports * 200000; break; case AD_LINK_SPEED_400000MBPS: bandwidth = nports * 400000; break; case AD_LINK_SPEED_800000MBPS: bandwidth = nports * 800000; break; default: bandwidth = 0; /* to silence the compiler */ } } return bandwidth; } /** * __get_active_agg - get the current active aggregator * @aggregator: the aggregator we're looking at * * Caller must hold RCU lock. */ static struct aggregator *__get_active_agg(struct aggregator *aggregator) { struct bonding *bond = aggregator->slave->bond; struct list_head *iter; struct slave *slave; bond_for_each_slave_rcu(bond, slave, iter) if (SLAVE_AD_INFO(slave)->aggregator.is_active) return &(SLAVE_AD_INFO(slave)->aggregator); return NULL; } /** * __update_lacpdu_from_port - update a port's lacpdu fields * @port: the port we're looking at */ static inline void __update_lacpdu_from_port(struct port *port) { struct lacpdu *lacpdu = &port->lacpdu; const struct port_params *partner = &port->partner_oper; /* update current actual Actor parameters * lacpdu->subtype initialized * lacpdu->version_number initialized * lacpdu->tlv_type_actor_info initialized * lacpdu->actor_information_length initialized */ lacpdu->actor_system_priority = htons(port->actor_system_priority); lacpdu->actor_system = port->actor_system; lacpdu->actor_key = htons(port->actor_oper_port_key); lacpdu->actor_port_priority = htons(port->actor_port_priority); lacpdu->actor_port = htons(port->actor_port_number); lacpdu->actor_state = port->actor_oper_port_state; slave_dbg(port->slave->bond->dev, port->slave->dev, "update lacpdu: actor port state %x\n", port->actor_oper_port_state); /* lacpdu->reserved_3_1 initialized * lacpdu->tlv_type_partner_info initialized * lacpdu->partner_information_length initialized */ lacpdu->partner_system_priority = htons(partner->system_priority); lacpdu->partner_system = partner->system; lacpdu->partner_key = htons(partner->key); lacpdu->partner_port_priority = htons(partner->port_priority); lacpdu->partner_port = htons(partner->port_number); lacpdu->partner_state = partner->port_state; /* lacpdu->reserved_3_2 initialized * lacpdu->tlv_type_collector_info initialized * lacpdu->collector_information_length initialized * collector_max_delay initialized * reserved_12[12] initialized * tlv_type_terminator initialized * terminator_length initialized * reserved_50[50] initialized */ } /* ================= main 802.3ad protocol code ========================= */ /** * ad_lacpdu_send - send out a lacpdu packet on a given port * @port: the port we're looking at * * Returns: 0 on success * < 0 on error */ static int ad_lacpdu_send(struct port *port) { struct slave *slave = port->slave; struct sk_buff *skb; struct lacpdu_header *lacpdu_header; int length = sizeof(struct lacpdu_header); skb = dev_alloc_skb(length); if (!skb) return -ENOMEM; atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.lacpdu_tx); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; skb->priority = TC_PRIO_CONTROL; lacpdu_header = skb_put(skb, length); ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback lacpdus in receive. */ ether_addr_copy(lacpdu_header->hdr.h_source, slave->perm_hwaddr); lacpdu_header->hdr.h_proto = PKT_TYPE_LACPDU; lacpdu_header->lacpdu = port->lacpdu; dev_queue_xmit(skb); return 0; } /** * ad_marker_send - send marker information/response on a given port * @port: the port we're looking at * @marker: marker data to send * * Returns: 0 on success * < 0 on error */ static int ad_marker_send(struct port *port, struct bond_marker *marker) { struct slave *slave = port->slave; struct sk_buff *skb; struct bond_marker_header *marker_header; int length = sizeof(struct bond_marker_header); skb = dev_alloc_skb(length + 16); if (!skb) return -ENOMEM; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_tx); break; case AD_MARKER_RESPONSE_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_resp_tx); break; } skb_reserve(skb, 16); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; marker_header = skb_put(skb, length); ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback MARKERs in receive. */ ether_addr_copy(marker_header->hdr.h_source, slave->perm_hwaddr); marker_header->hdr.h_proto = PKT_TYPE_LACPDU; marker_header->marker = *marker; dev_queue_xmit(skb); return 0; } /** * ad_mux_machine - handle a port's mux state machine * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_mux_machine(struct port *port, bool *update_slave_arr) { struct bonding *bond = __get_bond_by_port(port); mux_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_mux_state; if (port->sm_vars & AD_PORT_BEGIN) { port->sm_mux_state = AD_MUX_DETACHED; } else { switch (port->sm_mux_state) { case AD_MUX_DETACHED: if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) /* if SELECTED or STANDBY */ port->sm_mux_state = AD_MUX_WAITING; break; case AD_MUX_WAITING: /* if SELECTED == FALSE return to DETACH state */ if (!(port->sm_vars & AD_PORT_SELECTED)) { port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the Selection Logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; break; } /* check if the wait_while_timer expired */ if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) port->sm_vars |= AD_PORT_READY_N; /* in order to withhold the selection logic to check * all ports READY_N value every callback cycle to * update ready variable, we check READY_N and update * READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); /* if the wait_while_timer expired, and the port is * in READY state, move to ATTACHED state */ if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) port->sm_mux_state = AD_MUX_ATTACHED; break; case AD_MUX_ATTACHED: /* check also if agg_select_timer expired (so the * edable port will take place only after this timer) */ if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { if (port->aggregator->is_active) { int state = AD_MUX_COLLECTING_DISTRIBUTING; if (!bond->params.coupled_control) state = AD_MUX_COLLECTING; port->sm_mux_state = state; } } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { /* if UNSELECTED or STANDBY */ port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the selection logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; } else if (port->aggregator->is_active) { port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; } break; case AD_MUX_COLLECTING_DISTRIBUTING: if (!__port_move_to_attached_state(port)) { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } } break; case AD_MUX_COLLECTING: if (!__port_move_to_attached_state(port)) { if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && (port->partner_oper.port_state & LACP_STATE_COLLECTING)) { port->sm_mux_state = AD_MUX_DISTRIBUTING; } else { /* If port state hasn't changed, make sure that a collecting * port is enabled for an active aggregator. */ struct slave *slave = port->slave; if (port->aggregator->is_active && bond_is_slave_rx_disabled(slave)) { ad_enable_collecting(port); *update_slave_arr = true; } } } break; case AD_MUX_DISTRIBUTING: if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_COLLECTING) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { port->sm_mux_state = AD_MUX_COLLECTING; } else { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator && port->aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } } break; default: break; } } /* check if the state machine was changed */ if (port->sm_mux_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Mux Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_mux_state); switch (port->sm_mux_state) { case AD_MUX_DETACHED: port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; ad_disable_collecting_distributing(port, update_slave_arr); port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->ntt = true; break; case AD_MUX_WAITING: port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); break; case AD_MUX_ATTACHED: if (port->aggregator->is_active) port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; else port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; ad_disable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting(port); ad_disable_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); break; default: break; } } } /** * ad_rx_machine - handle a port's rx State Machine * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * If lacpdu arrived, stop previous timer (if exists) and set the next state as * CURRENT. If timer expired set the state machine in the proper state. * In other cases, this function checks if we need to switch to other state. */ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) { rx_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_rx_state; if (lacpdu) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.lacpdu_rx); } /* check if state machine should change state */ /* first, check if port was reinitialized */ if (port->sm_vars & AD_PORT_BEGIN) { port->sm_rx_state = AD_RX_INITIALIZE; port->sm_vars |= AD_PORT_CHURNED; /* check if port is not enabled */ } else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled) port->sm_rx_state = AD_RX_PORT_DISABLED; /* check if new lacpdu arrived */ else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { if (port->sm_rx_state != AD_RX_CURRENT) port->sm_vars |= AD_PORT_CHURNED; port->sm_rx_timer_counter = 0; port->sm_rx_state = AD_RX_CURRENT; } else { /* if timer is on, and if it is expired */ if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { switch (port->sm_rx_state) { case AD_RX_EXPIRED: port->sm_rx_state = AD_RX_DEFAULTED; break; case AD_RX_CURRENT: port->sm_rx_state = AD_RX_EXPIRED; break; default: break; } } else { /* if no lacpdu arrived and no timer is on */ switch (port->sm_rx_state) { case AD_RX_PORT_DISABLED: if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) port->sm_rx_state = AD_RX_EXPIRED; else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) port->sm_rx_state = AD_RX_LACP_DISABLED; break; default: break; } } } /* check if the State machine was changed or new lacpdu arrived */ if ((port->sm_rx_state != last_state) || (lacpdu)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Rx Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_rx_state); switch (port->sm_rx_state) { case AD_RX_INITIALIZE: if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)) port->sm_vars &= ~AD_PORT_LACP_ENABLED; else port->sm_vars |= AD_PORT_LACP_ENABLED; port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; port->sm_rx_state = AD_RX_PORT_DISABLED; fallthrough; case AD_RX_PORT_DISABLED: port->sm_vars &= ~AD_PORT_MATCHED; break; case AD_RX_LACP_DISABLED: port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->partner_oper.port_state &= ~LACP_STATE_AGGREGATION; port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_EXPIRED: /* Reset of the Synchronization flag (Standard 43.4.12) * This reset cause to disable this port in the * COLLECTING_DISTRIBUTING state of the mux machine in * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; break; case AD_RX_DEFAULTED: __update_default_selected(port); __record_default(port); port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_CURRENT: /* detect loopback situation */ if (MAC_ADDRESS_EQUAL(&(lacpdu->actor_system), &(port->actor_system))) { slave_err(port->slave->bond->dev, port->slave->dev, "An illegal loopback occurred on slave\n" "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n"); return; } __update_selected(lacpdu, port); __update_ntt(lacpdu, port); __record_pdu(lacpdu, port); port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; default: break; } } } /** * ad_churn_machine - handle port churn's state machine * @port: the port we're looking at * */ static void ad_churn_machine(struct port *port) { if (port->sm_vars & AD_PORT_CHURNED) { port->sm_vars &= ~AD_PORT_CHURNED; port->sm_churn_actor_state = AD_CHURN_MONITOR; port->sm_churn_partner_state = AD_CHURN_MONITOR; port->sm_churn_actor_timer_counter = __ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0); port->sm_churn_partner_timer_counter = __ad_timer_to_ticks(AD_PARTNER_CHURN_TIMER, 0); return; } if (port->sm_churn_actor_timer_counter && !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_actor_state = AD_NO_CHURN; } else { port->churn_actor_count++; port->sm_churn_actor_state = AD_CHURN; } } if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_partner_state = AD_NO_CHURN; } else { port->churn_partner_count++; port->sm_churn_partner_state = AD_CHURN; } } } /** * ad_tx_machine - handle a port's tx state machine * @port: the port we're looking at */ static void ad_tx_machine(struct port *port) { /* check if tx timer expired, to verify that we do not send more than * 3 packets per second */ if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { /* check if there is something to send */ if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { __update_lacpdu_from_port(port); if (ad_lacpdu_send(port) >= 0) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent LACPDU on port %d\n", port->actor_port_number); /* mark ntt as false, so it will not be sent * again until demanded */ port->ntt = false; } } /* restart tx timer(to verify that we will not exceed * AD_MAX_TX_IN_SECOND */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; } } /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ static void ad_periodic_machine(struct port *port, struct bond_params *bond_params) { periodic_states_t last_state; /* keep current state machine state to compare later if it was changed */ last_state = port->sm_periodic_state; /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || !bond_params->lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ else if (port->sm_periodic_timer_counter) { /* check if periodic state machine expired */ if (!(--port->sm_periodic_timer_counter)) { /* if expired then do tx */ port->sm_periodic_state = AD_PERIODIC_TX; } else { /* If not expired, check if there is some new timeout * parameter from the partner state */ switch (port->sm_periodic_state) { case AD_FAST_PERIODIC: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; break; case AD_SLOW_PERIODIC: if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) { port->sm_periodic_timer_counter = 0; port->sm_periodic_state = AD_PERIODIC_TX; } break; default: break; } } } else { switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_state = AD_FAST_PERIODIC; break; case AD_PERIODIC_TX: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; else port->sm_periodic_state = AD_FAST_PERIODIC; break; default: break; } } /* check if the state machine was changed */ if (port->sm_periodic_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_periodic_state); switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_timer_counter = 0; break; case AD_FAST_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; break; case AD_SLOW_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; break; case AD_PERIODIC_TX: port->ntt = true; break; default: break; } } } /** * ad_port_selection_logic - select aggregation groups * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Select aggregation groups, and assign each port for it's aggregetor. The * selection logic is called in the inititalization (after all the handshkes), * and after every lacpdu receive (if selected is off). */ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) { struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct port *last_port = NULL, *curr_port; struct list_head *iter; struct bonding *bond; struct slave *slave; int found = 0; /* if the port is already Selected, do nothing */ if (port->sm_vars & AD_PORT_SELECTED) return; bond = __get_bond_by_port(port); /* if the port is connected to other aggregator, detach it */ if (port->aggregator) { /* detach the port from its former aggregator */ temp_aggregator = port->aggregator; for (curr_port = temp_aggregator->lag_ports; curr_port; last_port = curr_port, curr_port = curr_port->next_port_in_aggregator) { if (curr_port == port) { temp_aggregator->num_of_ports--; /* if it is the first port attached to the * aggregator */ if (!last_port) { temp_aggregator->lag_ports = port->next_port_in_aggregator; } else { /* not the first port attached to the * aggregator */ last_port->next_port_in_aggregator = port->next_port_in_aggregator; } /* clear the port's relations to this * aggregator */ port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->actor_port_aggregator_identifier = 0; slave_dbg(bond->dev, port->slave->dev, "Port %d left LAG %d\n", port->actor_port_number, temp_aggregator->aggregator_identifier); /* if the aggregator is empty, clear its * parameters, and set it ready to be attached */ if (!temp_aggregator->lag_ports) ad_clear_agg(temp_aggregator); break; } } if (!curr_port) { /* meaning: the port was related to an aggregator * but was not on the aggregator port list */ net_warn_ratelimited("%s: (slave %s): Warning: Port %d was related to aggregator %d but was not on its port list\n", port->slave->bond->dev->name, port->slave->dev->name, port->actor_port_number, port->aggregator->aggregator_identifier); } } /* search on all aggregators for a suitable aggregator for this port */ bond_for_each_slave(bond, slave, iter) { aggregator = &(SLAVE_AD_INFO(slave)->aggregator); /* keep a free aggregator for later use(if needed) */ if (!aggregator->lag_ports) { if (!free_aggregator) free_aggregator = aggregator; continue; } /* check if current aggregator suits us */ if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */ MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) && (aggregator->partner_system_priority == port->partner_oper.system_priority) && (aggregator->partner_oper_aggregator_key == port->partner_oper.key) ) && ((__agg_has_partner(aggregator) && /* partner answers */ !aggregator->is_individual) /* but is not individual OR */ ) ) { /* attach to the founded aggregator */ port->aggregator = aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; port->next_port_in_aggregator = aggregator->lag_ports; port->aggregator->num_of_ports++; aggregator->lag_ports = port; slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; found = 1; break; } } /* the port couldn't find an aggregator - attach it to a new * aggregator */ if (!found) { if (free_aggregator) { /* assign port a new aggregator */ port->aggregator = free_aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; /* update the new aggregator's parameters * if port was responsed from the end-user */ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) /* if port is full duplex */ port->aggregator->is_individual = false; else port->aggregator->is_individual = true; port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; port->aggregator->partner_system = port->partner_oper.system; port->aggregator->partner_system_priority = port->partner_oper.system_priority; port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; port->aggregator->receive_state = 1; port->aggregator->transmit_state = 1; port->aggregator->lag_ports = port; port->aggregator->num_of_ports++; /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); } else { slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE * in all aggregator's ports, else set ready=FALSE in all * aggregator's ports */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, update_slave_arr); if (!port->aggregator->is_active) port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } /* Decide if "agg" is a better choice for the new active aggregator that * the current best, according to the ad_select policy. */ static struct aggregator *ad_agg_selection_test(struct aggregator *best, struct aggregator *curr) { /* 0. If no best, select current. * * 1. If the current agg is not individual, and the best is * individual, select current. * * 2. If current agg is individual and the best is not, keep best. * * 3. Therefore, current and best are both individual or both not * individual, so: * * 3a. If current agg partner replied, and best agg partner did not, * select current. * * 3b. If current agg partner did not reply and best agg partner * did reply, keep best. * * 4. Therefore, current and best both have partner replies or * both do not, so perform selection policy: * * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * * BOND_AD_STABLE, BOND_AD_BANDWIDTH: Select by bandwidth. */ if (!best) return curr; if (!curr->is_individual && best->is_individual) return curr; if (curr->is_individual && !best->is_individual) return best; if (__agg_has_partner(curr) && !__agg_has_partner(best)) return curr; if (!__agg_has_partner(curr) && __agg_has_partner(best)) return best; switch (__get_agg_selection_mode(curr->lag_ports)) { case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; if (__agg_active_ports(curr) < __agg_active_ports(best)) return best; fallthrough; case BOND_AD_STABLE: case BOND_AD_BANDWIDTH: if (__get_agg_bandwidth(curr) > __get_agg_bandwidth(best)) return curr; break; default: net_warn_ratelimited("%s: (slave %s): Impossible agg select mode %d\n", curr->slave->bond->dev->name, curr->slave->dev->name, __get_agg_selection_mode(curr->lag_ports)); break; } return best; } static int agg_device_up(const struct aggregator *agg) { struct port *port = agg->lag_ports; if (!port) return 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (netif_running(port->slave->dev) && netif_carrier_ok(port->slave->dev)) return 1; } return 0; } /** * ad_agg_selection_logic - select an aggregation group for a team * @agg: the aggregator we're looking at * @update_slave_arr: Does slave array need update? * * It is assumed that only one aggregator may be selected for a team. * * The logic of this function is to select the aggregator according to * the ad_select policy: * * BOND_AD_STABLE: select the aggregator with the most ports attached to * it, and to reselect the active aggregator only if the previous * aggregator has no more ports related to it. * * BOND_AD_BANDWIDTH: select the aggregator with the highest total * bandwidth, and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * BOND_AD_COUNT: select the aggregator with largest number of ports * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. */ static void ad_agg_selection_logic(struct aggregator *agg, bool *update_slave_arr) { struct aggregator *best, *active, *origin; struct bonding *bond = agg->slave->bond; struct list_head *iter; struct slave *slave; struct port *port; rcu_read_lock(); origin = agg; active = __get_active_agg(agg); best = (active && agg_device_up(active)) ? active : NULL; bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); agg->is_active = 0; if (__agg_active_ports(agg) && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } if (best && __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { /* For the STABLE policy, don't replace the old active * aggregator if it's still active (it has an answering * partner) or if both the best and active don't have an * answering partner. */ if (active && active->lag_ports && __agg_active_ports(active) && (__agg_has_partner(active) || (!__agg_has_partner(active) && !__agg_has_partner(best)))) { if (!(!active->actor_oper_aggregator_key && best->actor_oper_aggregator_key)) { best = NULL; active->is_active = 1; } } } if (best && (best == active)) { best = NULL; active->is_active = 1; } /* if there is new best aggregator, activate it */ if (best) { netdev_dbg(bond->dev, "(slave %s): best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); netdev_dbg(bond->dev, "(slave %s): best ports %p slave %p\n", best->slave ? best->slave->dev->name : "NULL", best->lag_ports, best->slave); bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); slave_dbg(bond->dev, slave->dev, "Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", agg->aggregator_identifier, agg->num_of_ports, agg->actor_oper_aggregator_key, agg->partner_oper_aggregator_key, agg->is_individual, agg->is_active); } /* check if any partner replies */ if (best->is_individual) net_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", bond->dev->name); best->is_active = 1; netdev_dbg(bond->dev, "(slave %s): LAG %d chosen as the active LAG\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier); netdev_dbg(bond->dev, "(slave %s): Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); /* disable the ports that were related to the former * active_aggregator */ if (active) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __disable_port(port); } } /* Slave array needs update. */ *update_slave_arr = true; } /* if the selected aggregator is of join individuals * (partner_system is NULL), enable their ports */ active = __get_active_agg(origin); if (active) { if (!__agg_has_partner(active)) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __enable_port(port); } *update_slave_arr = true; } } rcu_read_unlock(); bond_3ad_set_carrier(bond); } /** * ad_clear_agg - clear a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_clear_agg(struct aggregator *aggregator) { if (aggregator) { aggregator->is_individual = false; aggregator->actor_admin_aggregator_key = 0; aggregator->actor_oper_aggregator_key = 0; eth_zero_addr(aggregator->partner_system.mac_addr_value); aggregator->partner_system_priority = 0; aggregator->partner_oper_aggregator_key = 0; aggregator->receive_state = 0; aggregator->transmit_state = 0; aggregator->lag_ports = NULL; aggregator->is_active = 0; aggregator->num_of_ports = 0; pr_debug("%s: LAG %d was cleared\n", aggregator->slave ? aggregator->slave->dev->name : "NULL", aggregator->aggregator_identifier); } } /** * ad_initialize_agg - initialize a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_initialize_agg(struct aggregator *aggregator) { if (aggregator) { ad_clear_agg(aggregator); eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value); aggregator->aggregator_identifier = 0; aggregator->slave = NULL; } } /** * ad_initialize_port - initialize a given port's parameters * @port: the port we're looking at * @lacp_fast: boolean. whether fast periodic should be used */ static void ad_initialize_port(struct port *port, int lacp_fast) { static const struct port_params tmpl = { .system_priority = 0xffff, .key = 1, .port_number = 1, .port_priority = 0xff, .port_state = 1, }; static const struct lacpdu lacpdu = { .subtype = 0x01, .version_number = 0x01, .tlv_type_actor_info = 0x01, .actor_information_length = 0x14, .tlv_type_partner_info = 0x02, .partner_information_length = 0x14, .tlv_type_collector_info = 0x03, .collector_information_length = 0x10, .collector_max_delay = htons(AD_COLLECTOR_MAX_DELAY), }; if (port) { port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; port->actor_admin_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; port->actor_oper_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); memcpy(&port->partner_oper, &tmpl, sizeof(tmpl)); port->is_enabled = true; /* private parameters */ port->sm_vars = AD_PORT_BEGIN | AD_PORT_LACP_ENABLED; port->sm_rx_state = 0; port->sm_rx_timer_counter = 0; port->sm_periodic_state = 0; port->sm_periodic_timer_counter = 0; port->sm_mux_state = 0; port->sm_mux_timer_counter = 0; port->sm_tx_state = 0; port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->transaction_id = 0; port->sm_churn_actor_timer_counter = 0; port->sm_churn_actor_state = 0; port->churn_actor_count = 0; port->sm_churn_partner_timer_counter = 0; port->sm_churn_partner_state = 0; port->churn_partner_count = 0; memcpy(&port->lacpdu, &lacpdu, sizeof(lacpdu)); } } /** * ad_enable_collecting - enable a port's receive * @port: the port we're looking at * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting(struct port *port) { if (port->aggregator->is_active) { struct slave *slave = port->slave; slave_dbg(slave->bond->dev, slave->dev, "Enabling collecting on port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_collecting_port(port); } } /** * ad_disable_distributing - disable a port's transmit * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && __agg_has_partner(port->aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling distributing on port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_distributing_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_enable_collecting_distributing - enable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator->is_active) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Enabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; } } /** * ad_disable_collecting_distributing - disable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && __agg_has_partner(port->aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_marker_info_received - handle receive of a Marker information frame * @marker_info: Marker info received * @port: the port we're looking at */ static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port) { struct bond_marker marker; atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_rx); /* copy the received marker data to the response marker */ memcpy(&marker, marker_info, sizeof(struct bond_marker)); /* change the marker subtype to marker response */ marker.tlv_type = AD_MARKER_RESPONSE_SUBTYPE; /* send the marker response */ if (ad_marker_send(port, &marker) >= 0) slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent Marker Response on port %d\n", port->actor_port_number); } /** * ad_marker_response_received - handle receive of a marker response frame * @marker: marker PDU received * @port: the port we're looking at * * This function does nothing since we decided not to implement send and handle * response for marker PDU's, in this stage, but only to respond to marker * information. */ static void ad_marker_response_received(struct bond_marker *marker, struct port *port) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_resp_rx); /* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */ } /* ========= AD exported functions to the main bonding code ========= */ /* Check aggregators status in team every T seconds */ #define AD_AGGREGATOR_SELECTION_TIMER 8 /** * bond_3ad_initiate_agg_selection - initate aggregator selection * @bond: bonding struct * @timeout: timeout value to set * * Set the aggregation selection timer, to initiate an agg selection in * the very near future. Called during first initialization, and during * any down to up transitions of the bond. */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout); } /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on * * Can be called only after the mac address of the bond is set. */ void bond_3ad_initialize(struct bonding *bond) { BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); bond_3ad_initiate_agg_selection(bond, AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); } /** * bond_3ad_bind_slave - initialize a slave's port * @slave: slave struct to work on * * Returns: 0 on success * < 0 on error */ void bond_3ad_bind_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); struct port *port; struct aggregator *aggregator; /* check that the slave has not been initialized yet. */ if (SLAVE_AD_INFO(slave)->port.slave != slave) { /* port initialization */ port = &(SLAVE_AD_INFO(slave)->port); ad_initialize_port(port, bond->params.lacp_fast); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and * user key */ port->actor_admin_port_key = bond->params.ad_user_port_key << 6; ad_update_actor_keys(port, false); /* actor system is the bond's system */ __ad_actor_update_port(port); /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; __disable_port(port); /* aggregator initialization */ aggregator = &(SLAVE_AD_INFO(slave)->aggregator); ad_initialize_agg(aggregator); aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier; aggregator->slave = slave; aggregator->is_active = 0; aggregator->num_of_ports = 0; } } /** * bond_3ad_unbind_slave - deinitialize a slave's port * @slave: slave struct to work on * * Search for the aggregator that is related to this port, remove the * aggregator and assign another aggregator for other port related to it * (if any), and remove the port. */ void bond_3ad_unbind_slave(struct slave *slave) { struct port *port, *prev_port, *temp_port; struct aggregator *aggregator, *new_aggregator, *temp_aggregator; int select_new_active_agg = 0; struct bonding *bond = slave->bond; struct slave *slave_iter; struct list_head *iter; bool dummy_slave_update; /* Ignore this value as caller updates array */ /* Sync against bond_3ad_state_machine_handler() */ spin_lock_bh(&bond->mode_lock); aggregator = &(SLAVE_AD_INFO(slave)->aggregator); port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(bond->dev, slave->dev, "Trying to unbind an uninitialized port\n"); goto out; } slave_dbg(bond->dev, slave->dev, "Unbinding Link Aggregation Group %d\n", aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~LACP_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); /* check if this aggregator is occupied */ if (aggregator->lag_ports) { /* check if there are other ports related to this aggregator * except the port related to this slave(thats ensure us that * there is a reason to search for new aggregator, and that we * will find one */ if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { /* find new aggregator for the related port(s) */ bond_for_each_slave(bond, slave_iter, iter) { new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); /* if the new aggregator is empty, or it is * connected to our port only */ if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) break; } if (!slave_iter) new_aggregator = NULL; /* if new aggregator found, copy the aggregator's * parameters and connect the related lag_ports to the * new aggregator */ if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { slave_dbg(bond->dev, slave->dev, "Some port(s) related to LAG %d - replacing with LAG %d\n", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier); if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); select_new_active_agg = 1; } new_aggregator->is_individual = aggregator->is_individual; new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; new_aggregator->partner_system = aggregator->partner_system; new_aggregator->partner_system_priority = aggregator->partner_system_priority; new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; new_aggregator->receive_state = aggregator->receive_state; new_aggregator->transmit_state = aggregator->transmit_state; new_aggregator->lag_ports = aggregator->lag_ports; new_aggregator->is_active = aggregator->is_active; new_aggregator->num_of_ports = aggregator->num_of_ports; /* update the information that is written on * the ports about the aggregator */ for (temp_port = aggregator->lag_ports; temp_port; temp_port = temp_port->next_port_in_aggregator) { temp_port->aggregator = new_aggregator; temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; } ad_clear_agg(aggregator); if (select_new_active_agg) ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } else { slave_warn(bond->dev, slave->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); } } else { /* in case that the only port related to this * aggregator is the one we want to remove */ select_new_active_agg = aggregator->is_active; ad_clear_agg(aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ temp_aggregator = __get_first_agg(port); if (temp_aggregator) ad_agg_selection_logic(temp_aggregator, &dummy_slave_update); } } } slave_dbg(bond->dev, slave->dev, "Unbinding port %d\n", port->actor_port_number); /* find the aggregator that this port is connected to */ bond_for_each_slave(bond, slave_iter, iter) { temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); prev_port = NULL; /* search the port in the aggregator's related ports */ for (temp_port = temp_aggregator->lag_ports; temp_port; prev_port = temp_port, temp_port = temp_port->next_port_in_aggregator) { if (temp_port == port) { /* the aggregator found - detach the port from * this aggregator */ if (prev_port) prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; else temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; if (temp_aggregator->num_of_ports == 0) ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } } break; } } } port->slave = NULL; out: spin_unlock_bh(&bond->mode_lock); } /** * bond_3ad_update_ad_actor_settings - reflect change of actor settings to ports * @bond: bonding struct to work on * * If an ad_actor setting gets changed we need to update the individual port * settings so the bond device will use the new values when it gets upped. */ void bond_3ad_update_ad_actor_settings(struct bonding *bond) { struct list_head *iter; struct slave *slave; ASSERT_RTNL(); BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { struct port *port = &(SLAVE_AD_INFO(slave))->port; __ad_actor_update_port(port); port->ntt = true; } spin_unlock_bh(&bond->mode_lock); } /** * bond_agg_timer_advance - advance agg_select_timer * @bond: bonding structure * * Return true when agg_select_timer reaches 0. */ static bool bond_agg_timer_advance(struct bonding *bond) { int val, nval; while (1) { val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer); if (!val) return false; nval = val - 1; if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer, val, nval) == val) break; } return nval == 0; } /** * bond_3ad_state_machine_handler - handle state machines timeout * @work: work context to fetch bonding struct to work on from * * The state machine handling concept in this module is to check every tick * which state machine should operate any function. The execution order is * round robin, so when we have an interaction between state machines, the * reply of one to each other might be delayed until next tick. * * This function also complete the initialization when the agg_select_timer * times out, and it selects an aggregator for the ports that are yet not * related to any aggregator, and selects the active aggregator for a bond. */ void bond_3ad_state_machine_handler(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, ad_work.work); struct aggregator *aggregator; struct list_head *iter; struct slave *slave; struct port *port; bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; bool update_slave_arr = false; /* Lock to protect data accessed by all (e.g., port->sm_vars) and * against running with bond_3ad_unbind_slave. ad_rx_machine may run * concurrently due to incoming LACPDU as well. */ spin_lock_bh(&bond->mode_lock); rcu_read_lock(); /* check if there are any slaves */ if (!bond_has_slaves(bond)) goto re_arm; if (bond_agg_timer_advance(bond)) { slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; /* select the active aggregator for the bond */ if (port) { if (!port->slave) { net_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n", bond->dev->name); goto re_arm; } aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, &update_slave_arr); } bond_3ad_set_carrier(bond); } /* for each port run the state machines */ bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: Found an uninitialized port\n", bond->dev->name); goto re_arm; } ad_rx_machine(NULL, port); ad_periodic_machine(port, &bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); ad_churn_machine(port); /* turn off the BEGIN bit, since we already handled it */ if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; } re_arm: bond_for_each_slave_rcu(bond, slave, iter) { if (slave->should_notify) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } } rcu_read_unlock(); spin_unlock_bh(&bond->mode_lock); if (update_slave_arr) bond_slave_arr_work_rearm(bond, 0); if (should_notify_rtnl && rtnl_trylock()) { bond_slave_state_notify(bond); rtnl_unlock(); } queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); } /** * bond_3ad_rx_indication - handle a received frame * @lacpdu: received lacpdu * @slave: slave struct to work on * * It is assumed that frames that were sent on this NIC don't returned as new * received frames (loopback). Since only the payload is given to this * function, it check for loopback. */ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave) { struct bonding *bond = slave->bond; int ret = RX_HANDLER_ANOTHER; struct bond_marker *marker; struct port *port; atomic64_t *stat; port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", slave->dev->name, slave->bond->dev->name); return ret; } switch (lacpdu->subtype) { case AD_TYPE_LACPDU: ret = RX_HANDLER_CONSUMED; slave_dbg(slave->bond->dev, slave->dev, "Received LACPDU on port %d\n", port->actor_port_number); /* Protect against concurrent state machines */ spin_lock(&slave->bond->mode_lock); ad_rx_machine(lacpdu, port); spin_unlock(&slave->bond->mode_lock); break; case AD_TYPE_MARKER: ret = RX_HANDLER_CONSUMED; /* No need to convert fields to Little Endian since we * don't use the marker's fields. */ marker = (struct bond_marker *)lacpdu; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Information on port %d\n", port->actor_port_number); ad_marker_info_received(marker, port); break; case AD_MARKER_RESPONSE_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Response on port %d\n", port->actor_port_number); ad_marker_response_received(marker, port); break; default: slave_dbg(slave->bond->dev, slave->dev, "Received an unknown Marker subtype on port %d\n", port->actor_port_number); stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx; atomic64_inc(stat); stat = &BOND_AD_INFO(bond).stats.marker_unknown_rx; atomic64_inc(stat); } break; default: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_unknown_rx); } return ret; } /** * ad_update_actor_keys - Update the oper / admin keys for a port based on * its current speed and duplex settings. * * @port: the port we'are looking at * @reset: Boolean to just reset the speed and the duplex part of the key * * The logic to change the oper / admin keys is: * (a) A full duplex port can participate in LACP with partner. * (b) When the speed is changed, LACP need to be reinitiated. */ static void ad_update_actor_keys(struct port *port, bool reset) { u8 duplex = 0; u16 ospeed = 0, speed = 0; u16 old_oper_key = port->actor_oper_port_key; port->actor_admin_port_key &= ~(AD_SPEED_KEY_MASKS|AD_DUPLEX_KEY_MASKS); if (!reset) { speed = __get_link_speed(port); ospeed = (old_oper_key & AD_SPEED_KEY_MASKS) >> 1; duplex = __get_duplex(port); port->actor_admin_port_key |= (speed << 1) | duplex; } port->actor_oper_port_key = port->actor_admin_port_key; if (old_oper_key != port->actor_oper_port_key) { /* Only 'duplex' port participates in LACP */ if (duplex) port->sm_vars |= AD_PORT_LACP_ENABLED; else port->sm_vars &= ~AD_PORT_LACP_ENABLED; if (!reset) { if (!speed) { slave_err(port->slave->bond->dev, port->slave->dev, "speed changed to 0 on port %d\n", port->actor_port_number); } else if (duplex && ospeed != speed) { /* Speed change restarts LACP state-machine */ port->sm_vars |= AD_PORT_BEGIN; } } } } /** * bond_3ad_adapter_speed_duplex_changed - handle a slave's speed / duplex * change indication * * @slave: slave struct to work on * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) { struct port *port; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "speed/duplex changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); ad_update_actor_keys(port, false); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed speed/duplex\n", port->actor_port_number); } /** * bond_3ad_handle_link_change - handle a slave's link status change indication * @slave: slave struct to work on * @link: whether the link is now up or down * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_handle_link_change(struct slave *slave, char link) { struct aggregator *agg; struct port *port; bool dummy; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "link status changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); /* on link down we are zeroing duplex and speed since * some of the adaptors(ce1000.lan) report full duplex/speed * instead of N/A(duplex) / 0(speed). * * on link up we are forcing recheck on the duplex and speed since * some of he adaptors(ce1000.lan) report. */ if (link == BOND_LINK_UP) { port->is_enabled = true; ad_update_actor_keys(port, false); } else { /* link has failed */ port->is_enabled = false; ad_update_actor_keys(port, true); } agg = __get_first_agg(port); ad_agg_selection_logic(agg, &dummy); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); /* RTNL is held and mode_lock is released so it's safe * to update slave_array here. */ bond_update_slave_arr(slave->bond, NULL); } /** * bond_3ad_set_carrier - set link state for bonding master * @bond: bonding structure * * if we have an active aggregator, we're up, if not, we're down. * Presumes that we cannot have an active aggregator if there are * no slaves with link up. * * This behavior complies with IEEE 802.3 section 43.3.9. * * Called by bond_set_carrier(). Return zero if carrier state does not * change, nonzero if it does. */ int bond_3ad_set_carrier(struct bonding *bond) { struct aggregator *active; struct slave *first_slave; int ret = 1; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); if (!first_slave) { ret = 0; goto out; } active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); if (active) { /* are enough slaves available to consider link up? */ if (__agg_active_ports(active) < bond->params.min_links) { if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); goto out; } } else if (!netif_carrier_ok(bond->dev)) { netif_carrier_on(bond->dev); goto out; } } else if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); } out: rcu_read_unlock(); return ret; } /** * __bond_3ad_get_active_agg_info - get information of the active aggregator * @bond: bonding struct to work on * @ad_info: ad_info struct to fill with the bond's info * * Returns: 0 on success * < 0 on error */ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { struct aggregator *aggregator = NULL; struct list_head *iter; struct slave *slave; struct port *port; bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; break; } } if (!aggregator) return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, aggregator->partner_system.mac_addr_value); return 0; } int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); rcu_read_unlock(); return ret; } int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct lacpdu *lacpdu, _lacpdu; if (skb->protocol != PKT_TYPE_LACPDU) return RX_HANDLER_ANOTHER; if (!MAC_ADDRESS_EQUAL(eth_hdr(skb)->h_dest, lacpdu_mcast_addr)) return RX_HANDLER_ANOTHER; lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); if (!lacpdu) { atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_illegal_rx); return RX_HANDLER_ANOTHER; } return bond_3ad_rx_indication(lacpdu, slave); } /** * bond_3ad_update_lacp_rate - change the lacp rate * @bond: bonding struct * * When modify lacp_rate parameter via sysfs, * update actor_oper_port_state of each port. * * Hold bond->mode_lock, * so we can modify port->actor_oper_port_state, * no matter bond is up or down. */ void bond_3ad_update_lacp_rate(struct bonding *bond) { struct port *port = NULL; struct list_head *iter; struct slave *slave; int lacp_fast; lacp_fast = bond->params.lacp_fast; spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; else port->actor_oper_port_state &= ~LACP_STATE_LACP_TIMEOUT; } spin_unlock_bh(&bond->mode_lock); } size_t bond_3ad_stats_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_UNKNOWN_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_ILLEGAL_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_TX */ nla_total_size_64bit(sizeof(u64)); /* BOND_3AD_STAT_MARKER_UNKNOWN_RX */ } int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats) { u64 val; val = atomic64_read(&stats->lacpdu_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_illegal_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_ILLEGAL_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; return 0; }
7 6 3 3 3 3 3 8 8 3 2 1 3 2 2 2 5 5 4 4 3 4 3 3 2 2 1 6 6 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for assembler optimized version of Camellia * * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * Camellia parts based on code by: * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) */ #include <linux/unaligned.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> #include "camellia.h" #include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk); asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk); /* 2-way parallel cipher functions */ asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); } /* camellia sboxes */ __visible const u64 camellia_sp10011110[256] = { 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, 0x9e00009e9e9e9e00ULL, }; __visible const u64 camellia_sp22000222[256] = { 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, 0x3d3d0000003d3d3dULL, }; __visible const u64 camellia_sp03303033[256] = { 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, 0x004f4f004f004f4fULL, }; __visible const u64 camellia_sp00444404[256] = { 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, 0x00009e9e9e9e009eULL, }; __visible const u64 camellia_sp02220222[256] = { 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, 0x003d3d3d003d3d3dULL, }; __visible const u64 camellia_sp30333033[256] = { 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, 0x4f004f4f4f004f4fULL, }; __visible const u64 camellia_sp44044404[256] = { 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, 0x9e9e009e9e9e009eULL, }; __visible const u64 camellia_sp11101110[256] = { 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, 0x9e9e9e009e9e9e00ULL, }; /* key constants */ #define CAMELLIA_SIGMA1L (0xA09E667FL) #define CAMELLIA_SIGMA1R (0x3BCC908BL) #define CAMELLIA_SIGMA2L (0xB67AE858L) #define CAMELLIA_SIGMA2R (0x4CAA73B2L) #define CAMELLIA_SIGMA3L (0xC6EF372FL) #define CAMELLIA_SIGMA3R (0xE94F82BEL) #define CAMELLIA_SIGMA4L (0x54FF53A5L) #define CAMELLIA_SIGMA4R (0xF1D36F1CL) #define CAMELLIA_SIGMA5L (0x10E527FAL) #define CAMELLIA_SIGMA5R (0xDE682D1DL) #define CAMELLIA_SIGMA6L (0xB05688C2L) #define CAMELLIA_SIGMA6R (0xB3E6C1FDL) /* macros */ #define ROLDQ(l, r, bits) ({ \ u64 t = l; \ l = (l << bits) | (r >> (64 - bits)); \ r = (r << bits) | (t >> (64 - bits)); \ }) #define CAMELLIA_F(x, kl, kr, y) ({ \ u64 ii = x ^ (((u64)kl << 32) | kr); \ y = camellia_sp11101110[(uint8_t)ii]; \ y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp30333033[(uint8_t)ii]; \ y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp00444404[(uint8_t)ii]; \ y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp22000222[(uint8_t)ii]; \ y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ y = ror64(y, 32); \ }) #define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) { u64 kw4, tt; u32 dw, tl, tr; /* absorb kw2 to other subkeys */ /* round 2 */ subRL[3] ^= subRL[1]; /* round 4 */ subRL[5] ^= subRL[1]; /* round 6 */ subRL[7] ^= subRL[1]; subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; /* modified for FLinv(kl2) */ dw = (subRL[1] & subRL[9]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 8 */ subRL[11] ^= subRL[1]; /* round 10 */ subRL[13] ^= subRL[1]; /* round 12 */ subRL[15] ^= subRL[1]; subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; /* modified for FLinv(kl4) */ dw = (subRL[1] & subRL[17]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 14 */ subRL[19] ^= subRL[1]; /* round 16 */ subRL[21] ^= subRL[1]; /* round 18 */ subRL[23] ^= subRL[1]; if (max == 24) { /* kw3 */ subRL[24] ^= subRL[1]; /* absorb kw4 to other subkeys */ kw4 = subRL[25]; } else { subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; /* modified for FLinv(kl6) */ dw = (subRL[1] & subRL[25]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 20 */ subRL[27] ^= subRL[1]; /* round 22 */ subRL[29] ^= subRL[1]; /* round 24 */ subRL[31] ^= subRL[1]; /* kw3 */ subRL[32] ^= subRL[1]; /* absorb kw4 to other subkeys */ kw4 = subRL[33]; /* round 23 */ subRL[30] ^= kw4; /* round 21 */ subRL[28] ^= kw4; /* round 19 */ subRL[26] ^= kw4; kw4 ^= (kw4 & ~subRL[24]) << 32; /* modified for FL(kl5) */ dw = (kw4 & subRL[24]) >> 32; kw4 ^= rol32(dw, 1); } /* round 17 */ subRL[22] ^= kw4; /* round 15 */ subRL[20] ^= kw4; /* round 13 */ subRL[18] ^= kw4; kw4 ^= (kw4 & ~subRL[16]) << 32; /* modified for FL(kl3) */ dw = (kw4 & subRL[16]) >> 32; kw4 ^= rol32(dw, 1); /* round 11 */ subRL[14] ^= kw4; /* round 9 */ subRL[12] ^= kw4; /* round 7 */ subRL[10] ^= kw4; kw4 ^= (kw4 & ~subRL[8]) << 32; /* modified for FL(kl1) */ dw = (kw4 & subRL[8]) >> 32; kw4 ^= rol32(dw, 1); /* round 5 */ subRL[6] ^= kw4; /* round 3 */ subRL[4] ^= kw4; /* round 1 */ subRL[2] ^= kw4; /* kw1 */ subRL[0] ^= kw4; /* key XOR is end of F-function */ SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); dw = tl & (subRL[8] >> 32); /* FL(kl1) */ tr = subRL[10] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); dw = tl & (subRL[9] >> 32); /* FLinv(kl2) */ tr = subRL[7] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); dw = tl & (subRL[16] >> 32); /* FL(kl3) */ tr = subRL[18] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); dw = tl & (subRL[17] >> 32); /* FLinv(kl4) */ tr = subRL[15] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ if (max == 24) { SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ } else { tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); dw = tl & (subRL[24] >> 32); /* FL(kl5) */ tr = subRL[26] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); dw = tl & (subRL[25] >> 32); /* FLinv(kl6) */ tr = subRL[23] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ } } static void camellia_setup128(const unsigned char *key, u64 *subkey) { u64 kl, kr, ww; u64 subRL[26]; /** * k == kl || kr (|| is concatenation) */ kl = get_unaligned_be64(key); kr = get_unaligned_be64(key + 8); /* generate KL dependent subkeys */ /* kw1 */ subRL[0] = kl; /* kw2 */ subRL[1] = kr; /* rotation left shift 15bit */ ROLDQ(kl, kr, 15); /* k3 */ subRL[4] = kl; /* k4 */ subRL[5] = kr; /* rotation left shift 15+30bit */ ROLDQ(kl, kr, 30); /* k7 */ subRL[10] = kl; /* k8 */ subRL[11] = kr; /* rotation left shift 15+30+15bit */ ROLDQ(kl, kr, 15); /* k10 */ subRL[13] = kr; /* rotation left shift 15+30+15+17 bit */ ROLDQ(kl, kr, 17); /* kl3 */ subRL[16] = kl; /* kl4 */ subRL[17] = kr; /* rotation left shift 15+30+15+17+17 bit */ ROLDQ(kl, kr, 17); /* k13 */ subRL[18] = kl; /* k14 */ subRL[19] = kr; /* rotation left shift 15+30+15+17+17+17 bit */ ROLDQ(kl, kr, 17); /* k17 */ subRL[22] = kl; /* k18 */ subRL[23] = kr; /* generate KA */ kl = subRL[0]; kr = subRL[1]; CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); /* current status == (kll, klr, w0, w1) */ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); kl ^= ww; /* generate KA dependent subkeys */ /* k1, k2 */ subRL[2] = kl; subRL[3] = kr; ROLDQ(kl, kr, 15); /* k5,k6 */ subRL[6] = kl; subRL[7] = kr; ROLDQ(kl, kr, 15); /* kl1, kl2 */ subRL[8] = kl; subRL[9] = kr; ROLDQ(kl, kr, 15); /* k9 */ subRL[12] = kl; ROLDQ(kl, kr, 15); /* k11, k12 */ subRL[14] = kl; subRL[15] = kr; ROLDQ(kl, kr, 34); /* k15, k16 */ subRL[20] = kl; subRL[21] = kr; ROLDQ(kl, kr, 17); /* kw3, kw4 */ subRL[24] = kl; subRL[25] = kr; camellia_setup_tail(subkey, subRL, 24); } static void camellia_setup256(const unsigned char *key, u64 *subkey) { u64 kl, kr; /* left half of key */ u64 krl, krr; /* right half of key */ u64 ww; /* temporary variables */ u64 subRL[34]; /** * key = (kl || kr || krl || krr) (|| is concatenation) */ kl = get_unaligned_be64(key); kr = get_unaligned_be64(key + 8); krl = get_unaligned_be64(key + 16); krr = get_unaligned_be64(key + 24); /* generate KL dependent subkeys */ /* kw1 */ subRL[0] = kl; /* kw2 */ subRL[1] = kr; ROLDQ(kl, kr, 45); /* k9 */ subRL[12] = kl; /* k10 */ subRL[13] = kr; ROLDQ(kl, kr, 15); /* kl3 */ subRL[16] = kl; /* kl4 */ subRL[17] = kr; ROLDQ(kl, kr, 17); /* k17 */ subRL[22] = kl; /* k18 */ subRL[23] = kr; ROLDQ(kl, kr, 34); /* k23 */ subRL[30] = kl; /* k24 */ subRL[31] = kr; /* generate KR dependent subkeys */ ROLDQ(krl, krr, 15); /* k3 */ subRL[4] = krl; /* k4 */ subRL[5] = krr; ROLDQ(krl, krr, 15); /* kl1 */ subRL[8] = krl; /* kl2 */ subRL[9] = krr; ROLDQ(krl, krr, 30); /* k13 */ subRL[18] = krl; /* k14 */ subRL[19] = krr; ROLDQ(krl, krr, 34); /* k19 */ subRL[26] = krl; /* k20 */ subRL[27] = krr; ROLDQ(krl, krr, 34); /* generate KA */ kl = subRL[0] ^ krl; kr = subRL[1] ^ krr; CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); kl ^= krl; CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); kr ^= ww ^ krr; CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); kl ^= ww; /* generate KB */ krl ^= kl; krr ^= kr; CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); krr ^= ww; CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); krl ^= ww; /* generate KA dependent subkeys */ ROLDQ(kl, kr, 15); /* k5 */ subRL[6] = kl; /* k6 */ subRL[7] = kr; ROLDQ(kl, kr, 30); /* k11 */ subRL[14] = kl; /* k12 */ subRL[15] = kr; /* rotation left shift 32bit */ ROLDQ(kl, kr, 32); /* kl5 */ subRL[24] = kl; /* kl6 */ subRL[25] = kr; /* rotation left shift 17 from k11,k12 -> k21,k22 */ ROLDQ(kl, kr, 17); /* k21 */ subRL[28] = kl; /* k22 */ subRL[29] = kr; /* generate KB dependent subkeys */ /* k1 */ subRL[2] = krl; /* k2 */ subRL[3] = krr; ROLDQ(krl, krr, 30); /* k7 */ subRL[10] = krl; /* k8 */ subRL[11] = krr; ROLDQ(krl, krr, 30); /* k15 */ subRL[20] = krl; /* k16 */ subRL[21] = krr; ROLDQ(krl, krr, 51); /* kw3 */ subRL[32] = krl; /* kw4 */ subRL[33] = krr; camellia_setup_tail(subkey, subRL, 32); } static void camellia_setup192(const unsigned char *key, u64 *subkey) { unsigned char kk[32]; u64 krl, krr; memcpy(kk, key, 24); memcpy((unsigned char *)&krl, key+16, 8); krr = ~krl; memcpy(kk+24, (unsigned char *)&krr, 8); camellia_setup256(kk, subkey); } int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len) { if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; cctx->key_length = key_len; switch (key_len) { case 16: camellia_setup128(key, cctx->key_table); break; case 24: camellia_setup192(key, cctx->key_table); break; case 32: camellia_setup256(key, cctx->key_table); break; } return 0; } EXPORT_SYMBOL_GPL(__camellia_setkey); static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len); } static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { return camellia_setkey(&tfm->base, key, key_len); } void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src) { u8 buf[CAMELLIA_BLOCK_SIZE]; const u8 *iv = src; if (dst == src) iv = memcpy(buf, iv, sizeof(buf)); camellia_dec_blk_2way(ctx, dst, src); crypto_xor(dst + CAMELLIA_BLOCK_SIZE, iv, CAMELLIA_BLOCK_SIZE); } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); ECB_BLOCK(2, camellia_enc_blk_2way); ECB_BLOCK(1, camellia_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); ECB_BLOCK(2, camellia_dec_blk_2way); ECB_BLOCK(1, camellia_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); CBC_ENC_BLOCK(camellia_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); CBC_DEC_BLOCK(1, camellia_dec_blk); CBC_WALK_END(); } static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, .cia_setkey = camellia_setkey, .cia_encrypt = camellia_encrypt, .cia_decrypt = camellia_decrypt } } }; static struct skcipher_alg camellia_skcipher_algs[] = { { .base.cra_name = "ecb(camellia)", .base.cra_driver_name = "ecb-camellia-asm", .base.cra_priority = 300, .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct camellia_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, .setkey = camellia_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(camellia)", .base.cra_driver_name = "cbc-camellia-asm", .base.cra_priority = 300, .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct camellia_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, .ivsize = CAMELLIA_BLOCK_SIZE, .setkey = camellia_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, } }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, camellia-asm is slower than original assembler * implementation because excessive uses of 64bit rotate and * left-shifts (which are really slow on P4) needed to store and * handle 128bit block in two 64bit registers. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init camellia_init(void) { int err; if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "camellia-x86_64: performance on this CPU " "would be suboptimal: disabling " "camellia-x86_64.\n"); return -ENODEV; } err = crypto_register_alg(&camellia_cipher_alg); if (err) return err; err = crypto_register_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); if (err) crypto_unregister_alg(&camellia_cipher_alg); return err; } static void __exit camellia_fini(void) { crypto_unregister_alg(&camellia_cipher_alg); crypto_unregister_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); } module_init(camellia_init); module_exit(camellia_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); MODULE_ALIAS_CRYPTO("camellia"); MODULE_ALIAS_CRYPTO("camellia-asm");
8 8 8 8 1 1 7 7 7 33 25 32 12 12 12 32 32 32 33 24 9 4 30 58 58 58 15 3 16 34 3 5 2 3 2 3 2 1 1 1 10 10 9 1 1 9 9 1 1 1 1 1 9 9 9 7 7 1 6 7 7 1 1 1 1 1 98 98 97 98 98 98 84 14 93 6 98 2 3 4 4 4 3 2 2 4 4 3 109 109 1 11 98 87 68 83 4 3 3 14 6 8 2 4 2 3 3 1 2 1 1 5 1 40 3 34 14 9 9 2 3 21 17 5 1 3 1 1 1 43 5 21 17 3 59 54 2 4 3 8 9 97 86 86 86 37 32 6 1 31 3 22 21 6 33 27 7 33 7 1 24 1 25 2 7 3 1 2 2 2 115 184 1 1 1 1 1 1 174 1 1 1 2 1 1 56 56 56 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * Changes: * Roger Venning <r.venning@telstra.com>: 6to4 support * Nate Thompson <nate@thebog.net>: 6to4 support * Fred Templin <fred.l.templin@boeing.com>: isatap support */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmp.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/ndisc.h> #include <net/addrconf.h> #include <net/ip.h> #include <net/udp.h> #include <net/icmp.h> #include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/dsfield.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/inet_dscp.h> /* This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c For comments look at net/ipv4/ip_gre.c --ANK */ #define IP6_SIT_HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; static unsigned int sit_net_id __read_mostly; struct sit_net { struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_wc[1]; struct ip_tunnel __rcu **tunnels[4]; struct net_device *fb_tunnel_dev; }; static inline struct sit_net *dev_to_sit_net(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); return net_generic(t->net, sit_net_id); } /* * Must be invoked with rcu_read_lock */ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, struct net_device *dev, __be32 remote, __be32 local, int sifindex) { unsigned int h0 = HASH(remote); unsigned int h1 = HASH(local); struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); int ifindex = dev ? dev->ifindex : 0; for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (!dev || !t->parms.link || ifindex == t->parms.link || sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && (!dev || !t->parms.link || ifindex == t->parms.link || sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && (!dev || !t->parms.link || ifindex == t->parms.link || sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } t = rcu_dereference(sitn->tunnels_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } static struct ip_tunnel __rcu ** __ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm_kern *parms) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; unsigned int h = 0; int prio = 0; if (remote) { prio |= 2; h ^= HASH(remote); } if (local) { prio |= 1; h ^= HASH(local); } return &sitn->tunnels[prio][h]; } static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn, struct ip_tunnel *t) { return __ipip6_bucket(sitn, &t->parms); } static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp; struct ip_tunnel *iter; for (tp = ipip6_bucket(sitn, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { rcu_assign_pointer(*tp, t->next); break; } } } static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) { #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; t->ip6rd.relay_prefixlen = 0; } else { struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); } #endif } static int ipip6_tunnel_create(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); int err; __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags)) dev->priv_flags |= IFF_ISATAP; dev->rtnl_link_ops = &sit_link_ops; err = register_netdevice(dev); if (err < 0) goto out; ipip6_tunnel_clone_6rd(dev, sitn); ipip6_tunnel_link(sitn, t); return 0; out: return err; } static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, struct ip_tunnel_parm_kern *parms, int create) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; struct ip_tunnel *t, *nt; struct ip_tunnel __rcu **tp; struct net_device *dev; char name[IFNAMSIZ]; struct sit_net *sitn = net_generic(net, sit_net_id); for (tp = __ipip6_bucket(sitn, parms); (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && parms->link == t->parms.link) { if (create) return NULL; else return t; } } if (!create) goto failed; if (parms->name[0]) { if (!dev_valid_name(parms->name)) goto failed; strscpy(name, parms->name, IFNAMSIZ); } else { strcpy(name, "sit%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!dev) return NULL; dev_net_set(dev, net); nt = netdev_priv(dev); nt->parms = *parms; if (ipip6_tunnel_create(dev) < 0) goto failed_free; if (!parms->name[0]) strcpy(parms->name, dev->name); return nt; failed_free: free_netdev(dev); failed: return NULL; } #define for_each_prl_rcu(start) \ for (prl = rcu_dereference(start); \ prl; \ prl = rcu_dereference(prl->next)) static struct ip_tunnel_prl_entry * __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) { struct ip_tunnel_prl_entry *prl; for_each_prl_rcu(t->prl) if (prl->addr == addr) break; return prl; } static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; if (copy_from_user(&kprl, a, sizeof(kprl))) return -EFAULT; cmax = kprl.datalen / sizeof(kprl); if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) cmax = 1; /* For simple GET or for root users, * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; ca = min(t->prl_count, cmax); if (!kp) { /* We don't try hard to allocate much memory for * non-root users. * For root users, retry allocating enough memory for * the answer. */ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN); if (!kp) { ret = -ENOMEM; goto out; } } rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; c++; if (kprl.addr != htonl(INADDR_ANY)) break; } rcu_read_unlock(); len = sizeof(*kp) * c; ret = 0; if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) ret = -EFAULT; kfree(kp); out: return ret; } static int ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) { struct ip_tunnel_prl_entry *p; int err = 0; if (a->addr == htonl(INADDR_ANY)) return -EINVAL; ASSERT_RTNL(); for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) { if (p->addr == a->addr) { if (chg) { p->flags = a->flags; goto out; } err = -EEXIST; goto out; } } if (chg) { err = -ENXIO; goto out; } p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); if (!p) { err = -ENOBUFS; goto out; } p->next = t->prl; p->addr = a->addr; p->flags = a->flags; t->prl_count++; rcu_assign_pointer(t->prl, p); out: return err; } static void prl_list_destroy_rcu(struct rcu_head *head) { struct ip_tunnel_prl_entry *p, *n; p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); do { n = rcu_dereference_protected(p->next, 1); kfree(p); p = n; } while (p); } static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { struct ip_tunnel_prl_entry *x; struct ip_tunnel_prl_entry __rcu **p; int err = 0; ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; (x = rtnl_dereference(*p)) != NULL; p = &x->next) { if (x->addr == a->addr) { *p = x->next; kfree_rcu(x, rcu_head); t->prl_count--; goto out; } } err = -ENXIO; } else { x = rtnl_dereference(t->prl); if (x) { t->prl_count = 0; call_rcu(&x->rcu_head, prl_list_destroy_rcu); t->prl = NULL; } } out: return err; } static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ip_tunnel_prl __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl prl; int err; if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; if (copy_from_user(&prl, data, sizeof(prl))) return -EFAULT; switch (cmd) { case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); break; case SIOCADDPRL: case SIOCCHGPRL: err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } dst_cache_reset(&t->dst_cache); netdev_state_change(dev); return err; } static int isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) { struct ip_tunnel_prl_entry *p; int ok = 1; rcu_read_lock(); p = __ipip6_tunnel_locate_prl(t, iph->saddr); if (p) { if (p->flags & PRL_DEFAULT) skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; else skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; } else { const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; if (ipv6_addr_is_isatap(addr6) && (addr6->s6_addr32[3] == iph->saddr) && ipv6_chk_prefix(addr6, t->dev)) skb->ndisc_nodetype = NDISC_NODETYPE_HOST; else ok = 0; } rcu_read_unlock(); return ok; } static void ipip6_tunnel_uninit(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } dst_cache_reset(&tunnel->dst_cache); netdev_put(dev, &tunnel->dev_tracker); } static int ipip6_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; unsigned int data_len = 0; struct ip_tunnel *t; int sifindex; int err; switch (type) { default: case ICMP_PARAMETERPROB: return 0; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: /* Impossible event. */ return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, I believe they are just ether pollution. --ANK */ break; } break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: break; } err = -ENOENT; sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, iph->saddr, sifindex); if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, t->parms.link, iph->protocol); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, iph->protocol); err = 0; goto out; } err = 0; if (__in6_dev_get(skb->dev) && !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; out: return err; } static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, const struct in6_addr *v6addr) { __be32 v4embed = 0; if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed) return true; return false; } /* Checks if an address matches an address on the tunnel interface. * Used to detect the NAT of proto 41 packets and let them pass spoofing test. * Long story: * This function is called after we considered the packet as spoofed * in is_spoofed_6rd. * We may have a router that is doing NAT for proto 41 packets * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd * function will return true, dropping the packet. * But, we can still check if is spoofed against the IP * addresses associated with the interface. */ static bool only_dnatted(const struct ip_tunnel *tunnel, const struct in6_addr *v6dst) { int prefix_len; #ifdef CONFIG_IPV6_SIT_6RD prefix_len = tunnel->ip6rd.prefixlen + 32 - tunnel->ip6rd.relay_prefixlen; #else prefix_len = 48; #endif return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); } /* Returns true if a packet is spoofed */ static bool packet_is_spoofed(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *tunnel) { const struct ipv6hdr *ipv6h; if (tunnel->dev->priv_flags & IFF_ISATAP) { if (!isatap_chksrc(skb, iph, tunnel)) return true; return false; } if (tunnel->dev->flags & IFF_POINTOPOINT) return false; ipv6h = ipv6_hdr(skb); if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", &iph->saddr, &ipv6h->saddr, &iph->daddr, &ipv6h->daddr); return true; } if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) return false; if (only_dnatted(tunnel, &ipv6h->daddr)) return false; net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", &iph->saddr, &ipv6h->saddr, &iph->daddr, &ipv6h->daddr); return true; } static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; int sifindex; int err; sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr, sifindex); if (tunnel) { if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) goto out; skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { DEV_STATS_INC(tunnel->dev, rx_errors); goto out; } if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6), !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; /* skb can be uncloned in iptunnel_pull_header, so * old iph is no longer valid */ iph = (const struct iphdr *)skb_mac_header(skb); skb_reset_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", &iph->saddr, iph->tos); if (err > 1) { DEV_STATS_INC(tunnel->dev, rx_frame_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto out; } } dev_sw_netstats_rx_add(tunnel->dev, skb->len); netif_rx(skb); return 0; } /* no tunnel matched, let upstream know, ipsec may handle it */ return 1; out: kfree_skb(skb); return 0; } static const struct tnl_ptk_info ipip_tpi = { /* no tunnel info required for ipip. */ .proto = htons(ETH_P_IP), }; #if IS_ENABLED(CONFIG_MPLS) static const struct tnl_ptk_info mplsip_tpi = { /* no tunnel info required for mplsip. */ .proto = htons(ETH_P_MPLS_UC), }; #endif static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { const struct iphdr *iph; struct ip_tunnel *tunnel; int sifindex; sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr, sifindex); if (tunnel) { const struct tnl_ptk_info *tpi; if (tunnel->parms.iph.protocol != ipproto && tunnel->parms.iph.protocol != 0) goto drop; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; #if IS_ENABLED(CONFIG_MPLS) if (ipproto == IPPROTO_MPLS) tpi = &mplsip_tpi; else #endif tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; skb_reset_mac_header(skb); return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } return 1; drop: kfree_skb(skb); return 0; } static int ipip_rcv(struct sk_buff *skb) { return sit_tunnel_rcv(skb, IPPROTO_IPIP); } #if IS_ENABLED(CONFIG_MPLS) static int mplsip_rcv(struct sk_buff *skb) { return sit_tunnel_rcv(skb, IPPROTO_MPLS); } #endif /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. */ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, __be32 *v4dst) { #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { unsigned int pbw0, pbi0; int pbi1; u32 d; pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; d = tunnel->ip6rd.relay_prefixlen < 32 ? (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> (32 - pbi1); *v4dst = tunnel->ip6rd.relay_prefix | htonl(d); return true; } #else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ memcpy(v4dst, &v6dst->s6_addr16[1], 4); return true; } #endif return false; } static inline __be32 try_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst) { __be32 dst = 0; check_6rd(tunnel, v6dst, &dst); return dst; } /* * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. */ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; const struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; const struct in6_addr *addr6; int addr_type; u8 ttl; u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); if (tos == 1) tos = ipv6_get_dsfield(iph6); /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; bool do_tx_error = false; if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if ((addr_type & IPV6_ADDR_UNICAST) && ipv6_addr_is_isatap(addr6)) dst = addr6->s6_addr32[3]; else do_tx_error = true; neigh_release(neigh); if (do_tx_error) goto tx_error; } if (!dst) dst = try_6rd(tunnel, &iph6->daddr); if (!dst) { struct neighbour *neigh = NULL; bool do_tx_error = false; if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } addr6 = (const struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { addr6 = &ipv6_hdr(skb)->daddr; addr_type = ipv6_addr_type(addr6); } if ((addr_type & IPV6_ADDR_COMPATv4) != 0) dst = addr6->s6_addr32[3]; else do_tx_error = true; neigh_release(neigh); if (do_tx_error) goto tx_error; } flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark, tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE, IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0, sock_net_uid(tunnel->net, NULL)); rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr); if (!rt) { rt = ip_route_output_flow(tunnel->net, &fl4, NULL); if (IS_ERR(rt)) { DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); DEV_STATS_INC(dev, collisions); goto tx_error; } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) { ip_rt_put(rt); goto tx_error; } if (df) { mtu = dst_mtu(&rt->dst) - t_hlen; if (mtu < IPV4_MIN_MTU) { DEV_STATS_INC(dev, collisions); ip_rt_put(rt); goto tx_error; } if (mtu < IPV6_MIN_MTU) { mtu = IPV6_MIN_MTU; df = 0; } if (tunnel->parms.iph.daddr) skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } } if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); } else tunnel->err_count = 0; } /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); dev_kfree_skb(skb); skb = new_skb; iph6 = ipv6_hdr(skb); } ttl = tiph->ttl; if (ttl == 0) ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) { ip_rt_put(rt); goto tx_error; } skb_set_inner_ipproto(skb, IPPROTO_IPV6); iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb, struct net_device *dev, u8 ipproto) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, ipproto); ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: kfree_skb(skb); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { if (!pskb_inet_may_pull(skb)) goto tx_err; switch (skb->protocol) { case htons(ETH_P_IP): sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP); break; case htons(ETH_P_IPV6): ipip6_tunnel_xmit(skb, dev); break; #if IS_ENABLED(CONFIG_MPLS) case htons(ETH_P_MPLS_UC): sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS); break; #endif default: goto tx_err; } return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return NETDEV_TX_OK; } static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); struct net_device *tdev = NULL; int hlen = LL_MAX_HEADER; const struct iphdr *iph; struct flowi4 fl4; iph = &tunnel->parms.iph; if (iph->daddr) { struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, iph->tos & INET_DSCP_MASK, tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev && !netif_is_l3_master(tdev)) { int mtu; mtu = tdev->mtu - t_hlen; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); hlen = tdev->hard_header_len + tdev->needed_headroom; } dev->needed_headroom = t_hlen + hlen; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm_kern *p, __u32 fwmark) { struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); synchronize_net(); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; __dev_addr_set(t->dev, &p->iph.saddr, 4); memcpy(t->dev->broadcast, &p->iph.daddr, 4); ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link || t->fwmark != fwmark) { t->parms.link = p->link; t->fwmark = fwmark; ipip6_tunnel_bind_dev(t->dev); } dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); } #ifdef CONFIG_IPV6_SIT_6RD static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, struct ip_tunnel_6rd *ip6rd) { struct in6_addr prefix; __be32 relay_prefix; if (ip6rd->relay_prefixlen > 32 || ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64) return -EINVAL; ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen); if (!ipv6_addr_equal(&prefix, &ip6rd->prefix)) return -EINVAL; if (ip6rd->relay_prefixlen) relay_prefix = ip6rd->relay_prefix & htonl(0xffffffffUL << (32 - ip6rd->relay_prefixlen)); else relay_prefix = 0; if (relay_prefix != ip6rd->relay_prefix) return -EINVAL; t->ip6rd.prefix = prefix; t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; dst_cache_reset(&t->dst_cache); netdev_state_change(t->dev); return 0; } static int ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern p; struct ip_tunnel_6rd ip6rd; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { if (!ip_tunnel_parm_from_user(&p, data)) return -EFAULT; t = ipip6_tunnel_locate(t->net, &p, 0); } if (!t) t = netdev_priv(dev); ip6rd.prefix = t->ip6rd.prefix; ip6rd.relay_prefix = t->ip6rd.relay_prefix; ip6rd.prefixlen = t->ip6rd.prefixlen; ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; if (copy_to_user(data, &ip6rd, sizeof(ip6rd))) return -EFAULT; return 0; } static int ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; int err; if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&ip6rd, data, sizeof(ip6rd))) return -EFAULT; if (cmd != SIOCDEL6RD) { err = ipip6_tunnel_update_6rd(t, &ip6rd); if (err < 0) return err; } else ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev)); return 0; } #endif /* CONFIG_IPV6_SIT_6RD */ static bool ipip6_valid_ip_proto(u8 ipproto) { return ipproto == IPPROTO_IPV6 || ipproto == IPPROTO_IPIP || #if IS_ENABLED(CONFIG_MPLS) ipproto == IPPROTO_MPLS || #endif ipproto == 0; } static int __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm_kern *p) { if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (!ipip6_valid_ip_proto(p->iph.protocol)) return -EINVAL; if (p->iph.version != 4 || p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) return -EINVAL; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); return 0; } static int ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) t = ipip6_tunnel_locate(t->net, p, 0); if (!t) t = netdev_priv(dev); memcpy(p, &t->parms, sizeof(*p)); return 0; } static int ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); int err; err = __ipip6_tunnel_ioctl_validate(t->net, p); if (err) return err; t = ipip6_tunnel_locate(t->net, p, 1); if (!t) return -ENOBUFS; return 0; } static int ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); int err; err = __ipip6_tunnel_ioctl_validate(t->net, p); if (err) return err; t = ipip6_tunnel_locate(t->net, p, 0); if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { if (!t) return -ENOENT; } else { if (t) { if (t->dev != dev) return -EEXIST; } else { if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr)) return -EINVAL; t = netdev_priv(dev); } ipip6_tunnel_update(t, p, t->fwmark); } return 0; } static int ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { t = ipip6_tunnel_locate(t->net, p, 0); if (!t) return -ENOENT; if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev)) return -EPERM; dev = t->dev; } unregister_netdevice(dev); return 0; } static int ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { switch (cmd) { case SIOCGETTUNNEL: return ipip6_tunnel_get(dev, p); case SIOCADDTUNNEL: return ipip6_tunnel_add(dev, p); case SIOCCHGTUNNEL: return ipip6_tunnel_change(dev, p); case SIOCDELTUNNEL: return ipip6_tunnel_del(dev, p); default: return -EINVAL; } } static int ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { switch (cmd) { case SIOCGETTUNNEL: case SIOCADDTUNNEL: case SIOCCHGTUNNEL: case SIOCDELTUNNEL: return ip_tunnel_siocdevprivate(dev, ifr, data, cmd); case SIOCGETPRL: return ipip6_tunnel_get_prl(dev, data); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: return ipip6_tunnel_prl_ctl(dev, data, cmd); #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: return ipip6_tunnel_get6rd(dev, data); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: return ipip6_tunnel_6rdctl(dev, data, cmd); #endif default: return -EINVAL; } } static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; static void ipip6_dev_free(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); dst_cache_destroy(&tunnel->dst_cache); } #define SIT_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) static void ipip6_tunnel_setup(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; dev->lltx = true; dev->features |= SIT_FEATURES; dev->hw_features |= SIT_FEATURES; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; } static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); int err; tunnel->dev = dev; tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (err) return err; netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; } static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); iph->version = 4; iph->protocol = IPPROTO_IPV6; iph->ihl = 5; iph->ttl = 64; rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); } static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u8 proto; if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); if (!ipip6_valid_ip_proto(proto)) return -EINVAL; return 0; } static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); parms->iph.version = 4; parms->iph.protocol = IPPROTO_IPV6; parms->iph.ihl = 5; parms->iph.ttl = 64; if (!data) return; ip_tunnel_netlink_parms(data, parms); if (data[IFLA_IPTUN_FWMARK]) *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } #ifdef CONFIG_IPV6_SIT_6RD /* This function returns true when 6RD attributes are present in the nl msg */ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], struct ip_tunnel_6rd *ip6rd) { bool ret = false; memset(ip6rd, 0, sizeof(*ip6rd)); if (!data) return ret; if (data[IFLA_IPTUN_6RD_PREFIX]) { ret = true; ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { ret = true; ip6rd->relay_prefix = nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]); } if (data[IFLA_IPTUN_6RD_PREFIXLEN]) { ret = true; ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) { ret = true; ip6rd->relay_prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); } return ret; } #endif static int ipip6_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); struct ip_tunnel *nt; struct ip_tunnel_encap ipencap; #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif int err; nt = netdev_priv(dev); if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(nt, &ipencap); if (err < 0) return err; } ipip6_netlink_parms(data, &nt->parms, &nt->fwmark); if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; err = ipip6_tunnel_create(dev); if (err < 0) return err; if (tb[IFLA_MTU]) { u32 mtu = nla_get_u32(tb[IFLA_MTU]); if (mtu >= IPV6_MIN_MTU && mtu <= IP6_MAX_MTU - dev->hard_header_len) dev->mtu = mtu; } #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) { err = ipip6_tunnel_update_6rd(nt, &ip6rd); if (err < 0) unregister_netdevice_queue(dev, NULL); } #endif return err; } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_encap ipencap; struct ip_tunnel_parm_kern p; struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif __u32 fwmark = t->fwmark; int err; if (dev == sitn->fb_tunnel_dev) return -EINVAL; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } ipip6_netlink_parms(data, &p, &fwmark); if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) return -EINVAL; t = ipip6_tunnel_locate(net, &p, 0); if (t) { if (t->dev != dev) return -EEXIST; } else t = netdev_priv(dev); ipip6_tunnel_update(t, &p, fwmark); #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) return ipip6_tunnel_update_6rd(t, &ip6rd); #endif return 0; } static size_t ipip6_get_size(const struct net_device *dev) { return /* IFLA_IPTUN_LINK */ nla_total_size(4) + /* IFLA_IPTUN_LOCAL */ nla_total_size(4) + /* IFLA_IPTUN_REMOTE */ nla_total_size(4) + /* IFLA_IPTUN_TTL */ nla_total_size(1) + /* IFLA_IPTUN_TOS */ nla_total_size(1) + /* IFLA_IPTUN_PMTUDISC */ nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_IPTUN_6RD_RELAY_PREFIX */ nla_total_size(4) + /* IFLA_IPTUN_6RD_PREFIXLEN */ nla_total_size(2) + /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ nla_total_size(2) + #endif /* IFLA_IPTUN_ENCAP_TYPE */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_FLAGS */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_SPORT */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + /* IFLA_IPTUN_FWMARK */ nla_total_size(4) + 0; } static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_parm_kern *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, ip_tunnel_flags_to_be16(parm->i_flags)) || nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark)) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX, &tunnel->ip6rd.prefix) || nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, tunnel->ip6rd.relay_prefix) || nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, tunnel->ip6rd.prefixlen) || nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, tunnel->ip6rd.relay_prefixlen)) goto nla_put_failure; #endif if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 }, [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 }, [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, #endif [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static void ipip6_dellink(struct net_device *dev, struct list_head *head) { struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); if (dev != sitn->fb_tunnel_dev) unregister_netdevice_queue(dev, head); } static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, .dellink = ipip6_dellink, .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel sit_handler __read_mostly = { .handler = ipip6_rcv, .err_handler = ipip6_err, .priority = 1, }; static struct xfrm_tunnel ipip_handler __read_mostly = { .handler = ipip_rcv, .err_handler = ipip6_err, .priority = 2, }; #if IS_ENABLED(CONFIG_MPLS) static struct xfrm_tunnel mplsip_handler __read_mostly = { .handler = mplsip_rcv, .err_handler = ipip6_err, .priority = 2, }; #endif static void __net_exit sit_destroy_tunnels(struct net *net, struct list_head *head) { struct sit_net *sitn = net_generic(net, sit_net_id); struct net_device *dev, *aux; int prio; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &sit_link_ops) unregister_netdevice_queue(dev, head); for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, head); t = rtnl_dereference(t->next); } } } } static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; sitn->tunnels[1] = sitn->tunnels_l; sitn->tunnels[2] = sitn->tunnels_r; sitn->tunnels[3] = sitn->tunnels_r_l; if (!net_has_fallback_tunnels(net)) return 0; sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!sitn->fb_tunnel_dev) { err = -ENOMEM; goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ sitn->fb_tunnel_dev->netns_local = true; err = register_netdev(sitn->fb_tunnel_dev); if (err) goto err_reg_dev; ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); t = netdev_priv(sitn->fb_tunnel_dev); strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list, struct list_head *dev_to_kill) { struct net *net; ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) sit_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, .exit_batch_rtnl = sit_exit_batch_rtnl, .id = &sit_net_id, .size = sizeof(struct sit_net), }; static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm4_tunnel_deregister(&ipip_handler, AF_INET); #if IS_ENABLED(CONFIG_MPLS) xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); #endif unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } static int __init sit_init(void) { int err; pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n"); err = register_pernet_device(&sit_net_ops); if (err < 0) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { pr_info("%s: can't register ip4ip4\n", __func__); goto xfrm_tunnel4_failed; } #if IS_ENABLED(CONFIG_MPLS) err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS); if (err < 0) { pr_info("%s: can't register mplsip\n", __func__); goto xfrm_tunnel_mpls_failed; } #endif err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; out: return err; rtnl_link_failed: #if IS_ENABLED(CONFIG_MPLS) xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); xfrm_tunnel_mpls_failed: #endif xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); goto out; } module_init(sit_init); module_exit(sit_cleanup); MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0");
241 243 243 246 189 190 243 243 118 119 238 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BLK_CGROUP_PRIVATE_H #define _BLK_CGROUP_PRIVATE_H /* * block cgroup private header * * Based on ideas and code from CFQ, CFS and BFQ: * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> * * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> * Paolo Valente <paolo.valente@unimore.it> * * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> * Nauman Rafique <nauman@google.com> */ #include <linux/blk-cgroup.h> #include <linux/cgroup.h> #include <linux/kthread.h> #include <linux/blk-mq.h> #include <linux/llist.h> #include "blk.h" struct blkcg_gq; struct blkg_policy_data; /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) #ifdef CONFIG_BLK_CGROUP enum blkg_iostat_type { BLKG_IOSTAT_READ, BLKG_IOSTAT_WRITE, BLKG_IOSTAT_DISCARD, BLKG_IOSTAT_NR, }; struct blkg_iostat { u64 bytes[BLKG_IOSTAT_NR]; u64 ios[BLKG_IOSTAT_NR]; }; struct blkg_iostat_set { struct u64_stats_sync sync; struct blkcg_gq *blkg; struct llist_node lnode; int lqueued; /* queued in llist */ struct blkg_iostat cur; struct blkg_iostat last; }; /* association between a blk cgroup and a request queue */ struct blkcg_gq { /* Pointer to the associated request_queue */ struct request_queue *q; struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; /* reference count */ struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; struct blkg_iostat_set __percpu *iostat_cpu; struct blkg_iostat_set iostat; struct blkg_policy_data *pd[BLKCG_MAX_POLS]; #ifdef CONFIG_BLK_CGROUP_PUNT_BIO spinlock_t async_bio_lock; struct bio_list async_bios; #endif union { struct work_struct async_bio_work; struct work_struct free_work; }; atomic_t use_delay; atomic64_t delay_nsec; atomic64_t delay_start; u64 last_delay; int last_use; struct rcu_head rcu_head; }; struct blkcg { struct cgroup_subsys_state css; spinlock_t lock; refcount_t online_pin; /* If there is block congestion on this cgroup. */ atomic_t congestion_count; struct radix_tree_root blkg_tree; struct blkcg_gq __rcu *blkg_hint; struct hlist_head blkg_list; struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; struct list_head all_blkcgs_node; /* * List of updated percpu blkg_iostat_set's since the last flush. */ struct llist_head __percpu *lhead; #ifdef CONFIG_BLK_CGROUP_FC_APPID char fc_app_id[FC_APPID_LEN]; #endif #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif }; static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track * information per blkcg - q pair. * * There can be multiple active blkcg policies and each blkg:policy pair is * represented by a blkg_policy_data which is allocated and freed by each * policy's pd_alloc/free_fn() methods. A policy can allocate private data * area by allocating larger data structure which embeds blkg_policy_data * at the beginning. */ struct blkg_policy_data { /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; int plid; bool online; }; /* * Policies that need to keep per-blkcg data which is independent from any * request_queue associated to it should implement cpd_alloc/free_fn() * methods. A policy can allocate private data area by allocating larger * data structure which embeds blkcg_policy_data at the beginning. * cpd_init() is invoked to let each policy handle per-blkcg data. */ struct blkcg_policy_data { /* the blkcg and policy id this per-policy data belongs to */ struct blkcg *blkcg; int plid; }; typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp); typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, struct seq_file *s); struct blkcg_policy { int plid; /* cgroup files for the policy */ struct cftype *dfl_cftypes; struct cftype *legacy_cftypes; /* operations */ blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; blkcg_pol_free_cpd_fn *cpd_free_fn; blkcg_pol_alloc_pd_fn *pd_alloc_fn; blkcg_pol_init_pd_fn *pd_init_fn; blkcg_pol_online_pd_fn *pd_online_fn; blkcg_pol_offline_pd_fn *pd_offline_fn; blkcg_pol_free_pd_fn *pd_free_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; blkcg_pol_stat_pd_fn *pd_stat_fn; }; extern struct blkcg blkcg_root; extern bool blkcg_debug_stats; void blkg_init_queue(struct request_queue *q); int blkcg_init_disk(struct gendisk *disk); void blkcg_exit_disk(struct gendisk *disk); /* Blkio controller policy registration */ int blkcg_policy_register(struct blkcg_policy *pol); void blkcg_policy_unregister(struct blkcg_policy *pol); int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol); void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol); const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int), const struct blkcg_policy *pol, int data, bool show_total); u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); struct blkg_conf_ctx { char *input; char *body; struct block_device *bdev; struct blkcg_gq *blkg; }; void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input); int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkg_conf_ctx *ctx); void blkg_conf_exit(struct blkg_conf_ctx *ctx); /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg * @return: true if this bio needs to be submitted with the root blkg context. * * In order to avoid priority inversions we sometimes need to issue a bio as if * it were attached to the root blkg, and then backcharge to the actual owning * blkg. The idea is we do bio_blkcg_css() to look up the actual context for * the bio and attach the appropriate blkg to the bio. Then we call this helper * and if it is true run with the root blkg for that queue and then do any * backcharging to the originating cgroup once the io is complete. */ static inline bool bio_issue_as_root_blkg(struct bio *bio) { return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; } /** * blkg_lookup - lookup blkg for the specified blkcg - q pair * @blkcg: blkcg of interest * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. * Must be called in a RCU critical section. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { struct blkcg_gq *blkg; if (blkcg == &blkcg_root) return q->root_blkg; blkg = rcu_dereference_check(blkcg->blkg_hint, lockdep_is_held(&q->queue_lock)); if (blkg && blkg->q == q) return blkg; blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); if (blkg && blkg->q != q) blkg = NULL; return blkg; } /** * blkg_to_pdata - get policy private data * @blkg: blkg of interest * @pol: policy of interest * * Return pointer to private data associated with the @blkg-@pol pair. */ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, struct blkcg_policy *pol) { return blkg ? blkg->pd[pol->plid] : NULL; } static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, struct blkcg_policy *pol) { return blkcg ? blkcg->cpd[pol->plid] : NULL; } /** * pdata_to_blkg - get blkg associated with policy private data * @pd: policy private data of interest * * @pd is policy private data. Determine the blkg it's associated with. */ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return pd ? pd->blkg : NULL; } static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) { return cpd ? cpd->blkcg : NULL; } /** * blkg_get - get a blkg reference * @blkg: blkg to get * * The caller should be holding an existing reference. */ static inline void blkg_get(struct blkcg_gq *blkg) { percpu_ref_get(&blkg->refcnt); } /** * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ static inline bool blkg_tryget(struct blkcg_gq *blkg) { return blkg && percpu_ref_tryget(&blkg->refcnt); } /** * blkg_put - put a blkg reference * @blkg: blkg to put */ static inline void blkg_put(struct blkcg_gq *blkg) { percpu_ref_put(&blkg->refcnt); } /** * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants * @d_blkg: loop cursor pointing to the current descendant * @pos_css: used for iteration * @p_blkg: target blkg to walk descendants of * * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU * read locked. If called under either blkcg or queue lock, the iteration * is guaranteed to include all and only online blkgs. The caller may * update @pos_css by calling css_rightmost_descendant() to skip subtree. * @p_blkg is included in the iteration and the first node to be visited. */ #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q))) /** * blkg_for_each_descendant_post - post-order walk of a blkg's descendants * @d_blkg: loop cursor pointing to the current descendant * @pos_css: used for iteration * @p_blkg: target blkg to walk descendants of * * Similar to blkg_for_each_descendant_pre() but performs post-order * traversal instead. Synchronization rules are the same. @p_blkg is * included in the iteration and the last node to be visited. */ #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q))) static inline void blkcg_bio_issue_init(struct bio *bio) { bio_issue_init(&bio->bi_issue, bio_sectors(bio)); } static inline void blkcg_use_delay(struct blkcg_gq *blkg) { if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) return; if (atomic_add_return(1, &blkg->use_delay) == 1) atomic_inc(&blkg->blkcg->congestion_count); } static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); if (WARN_ON_ONCE(old < 0)) return 0; if (old == 0) return 0; /* * We do this song and dance because we can race with somebody else * adding or removing delay. If we just did an atomic_dec we'd end up * negative and we'd already be in trouble. We need to subtract 1 and * then check to see if we were the last delay so we can drop the * congestion count on the cgroup. */ while (old && !atomic_try_cmpxchg(&blkg->use_delay, &old, old - 1)) ; if (old == 0) return 0; if (old == 1) atomic_dec(&blkg->blkcg->congestion_count); return 1; } /** * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount * @blkg: target blkg * @delay: delay duration in nsecs * * When enabled with this function, the delay is not decayed and must be * explicitly cleared with blkcg_clear_delay(). Must not be mixed with * blkcg_[un]use_delay() and blkcg_add_delay() usages. */ static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) { int old = atomic_read(&blkg->use_delay); /* We only want 1 person setting the congestion count for this blkg. */ if (!old && atomic_try_cmpxchg(&blkg->use_delay, &old, -1)) atomic_inc(&blkg->blkcg->congestion_count); atomic64_set(&blkg->delay_nsec, delay); } /** * blkcg_clear_delay - Disable allocator delay mechanism * @blkg: target blkg * * Disable use_delay mechanism. See blkcg_set_delay(). */ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); /* We only want 1 person clearing the congestion count for this blkg. */ if (old && atomic_try_cmpxchg(&blkg->use_delay, &old, 0)) atomic_dec(&blkg->blkcg->congestion_count); } /** * blk_cgroup_mergeable - Determine whether to allow or disallow merges * @rq: request to merge into * @bio: bio to merge * * @bio and @rq should belong to the same cgroup and their issue_as_root should * match. The latter is necessary as we don't want to throttle e.g. a metadata * update because it happens to be next to a regular IO. */ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return rq->bio->bi_blkg == bio->bi_blkg && bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); } void blk_cgroup_bio_start(struct bio *bio); void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); #else /* CONFIG_BLK_CGROUP */ struct blkg_policy_data { }; struct blkcg_policy_data { }; struct blkcg_policy { }; struct blkcg { }; static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } static inline void blkg_init_queue(struct request_queue *q) { } static inline int blkcg_init_disk(struct gendisk *disk) { return 0; } static inline void blkcg_exit_disk(struct gendisk *disk) { } static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } static inline int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { return 0; } static inline void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, struct blkcg_policy *pol) { return NULL; } static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline void blk_cgroup_bio_start(struct bio *bio) { } static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) #endif /* CONFIG_BLK_CGROUP */ #endif /* _BLK_CGROUP_PRIVATE_H */
6 2 2 1 3 3 3 7 3 10 10 7 2 14 1 1 3 10 3 9 10 1 1 14 14 12 12 3 9 12 12 6 24 24 24 1 1 1 2 1 1 1 1 4 4 1 1 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing */ #include "multicast.h" #include "main.h" #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/icmpv6.h> #include <linux/if_bridge.h> #include <linux/if_ether.h> #include <linux/igmp.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/jiffies.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/addrconf.h> #include <net/genetlink.h> #include <net/if_inet6.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/sock.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "netlink.h" #include "send.h" #include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" static void batadv_mcast_mla_update(struct work_struct *work); /** * batadv_mcast_start_timer() - schedule the multicast periodic worker * @bat_priv: the bat priv with all the soft interface information */ static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) { queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work, msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD)); } /** * batadv_mcast_get_bridge() - get the bridge on top of the softif if it exists * @soft_iface: netdev struct of the mesh interface * * If the given soft interface has a bridge on top then the refcount * of the according net device is increased. * * Return: NULL if no such bridge exists. Otherwise the net device of the * bridge. */ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) { struct net_device *upper = soft_iface; rcu_read_lock(); do { upper = netdev_master_upper_dev_get_rcu(upper); } while (upper && !netif_is_bridge_master(upper)); dev_hold(upper); rcu_read_unlock(); return upper; } /** * batadv_mcast_mla_rtr_flags_softif_get_ipv4() - get mcast router flags from * node for IPv4 * @dev: the interface to check * * Checks the presence of an IPv4 multicast router on this node. * * Caller needs to hold rcu read lock. * * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR4 otherwise. */ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev && IN_DEV_MFORWARD(in_dev)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR4; } /** * batadv_mcast_mla_rtr_flags_softif_get_ipv6() - get mcast router flags from * node for IPv6 * @dev: the interface to check * * Checks the presence of an IPv6 multicast router on this node. * * Caller needs to hold rcu read lock. * * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR6 otherwise. */ #if IS_ENABLED(CONFIG_IPV6_MROUTE) static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; } #else static inline u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { return BATADV_MCAST_WANT_NO_RTR6; } #endif /** * batadv_mcast_mla_rtr_flags_softif_get() - get mcast router flags from node * @bat_priv: the bat priv with all the soft interface information * @bridge: bridge interface on top of the soft_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this * node. * * Return: * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv, struct net_device *bridge) { struct net_device *dev = bridge ? bridge : bat_priv->soft_iface; u8 flags = BATADV_NO_FLAGS; rcu_read_lock(); flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv4(dev); flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv6(dev); rcu_read_unlock(); return flags; } /** * batadv_mcast_mla_rtr_flags_bridge_get() - get mcast router flags from bridge * @bat_priv: the bat priv with all the soft interface information * @bridge: bridge interface on top of the soft_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers behind a bridge. * * Return: * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv, struct net_device *bridge) { struct net_device *dev = bat_priv->soft_iface; u8 flags = BATADV_NO_FLAGS; if (!bridge) return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6; if (!br_multicast_has_router_adjacent(dev, ETH_P_IP)) flags |= BATADV_MCAST_WANT_NO_RTR4; if (!br_multicast_has_router_adjacent(dev, ETH_P_IPV6)) flags |= BATADV_MCAST_WANT_NO_RTR6; return flags; } /** * batadv_mcast_mla_rtr_flags_get() - get multicast router flags * @bat_priv: the bat priv with all the soft interface information * @bridge: bridge interface on top of the soft_iface if present, * otherwise pass NULL * * Checks the presence of IPv4 and IPv6 multicast routers on this * node or behind its bridge. * * Return: * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present * The former two OR'd: no multicast router is present */ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, struct net_device *bridge) { u8 flags = BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6; flags &= batadv_mcast_mla_rtr_flags_softif_get(bat_priv, bridge); flags &= batadv_mcast_mla_rtr_flags_bridge_get(bat_priv, bridge); return flags; } /** * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags * @bat_priv: the bat priv with all the soft interface information * * Checks if all active hard interfaces have an MTU larger or equal to 1280 * bytes (IPv6 minimum MTU). * * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise. */ static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) { const struct batadv_hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) { rcu_read_unlock(); return BATADV_NO_FLAGS; } } rcu_read_unlock(); return BATADV_MCAST_HAVE_MC_PTYPE_CAPA; } /** * batadv_mcast_mla_flags_get() - get the new multicast flags * @bat_priv: the bat priv with all the soft interface information * * Return: A set of flags for the current/next TVLV, querier and * bridge state. */ static struct batadv_mcast_mla_flags batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv) { struct net_device *dev = bat_priv->soft_iface; struct batadv_mcast_querier_state *qr4, *qr6; struct batadv_mcast_mla_flags mla_flags; struct net_device *bridge; bridge = batadv_mcast_get_bridge(dev); memset(&mla_flags, 0, sizeof(mla_flags)); mla_flags.enabled = 1; mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv, bridge); mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv); if (!bridge) return mla_flags; dev_put(bridge); mla_flags.bridged = 1; qr4 = &mla_flags.querier_ipv4; qr6 = &mla_flags.querier_ipv6; if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)) pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); qr4->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); qr4->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); qr6->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6); qr6->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6); mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES; /* 1) If no querier exists at all, then multicast listeners on * our local TT clients behind the bridge will keep silent. * 2) If the selected querier is on one of our local TT clients, * behind the bridge, then this querier might shadow multicast * listeners on our local TT clients, behind this bridge. * * In both cases, we will signalize other batman nodes that * we need all multicast traffic of the according protocol. */ if (!qr4->exists || qr4->shadowing) { mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV4; mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR4; } if (!qr6->exists || qr6->shadowing) { mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV6; mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR6; } return mla_flags; } /** * batadv_mcast_mla_is_duplicate() - check whether an address is in a list * @mcast_addr: the multicast address to check * @mcast_list: the list with multicast addresses to search in * * Return: true if the given address is already in the given list. * Otherwise returns false. */ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; hlist_for_each_entry(mcast_entry, mcast_list, list) if (batadv_compare_eth(mcast_entry->addr, mcast_addr)) return true; return false; } /** * batadv_mcast_mla_softif_get_ipv4() - get softif IPv4 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv4 multicast listeners residing * on this kernel on the given soft interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_softif_get_ipv4(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct batadv_hw_addr *new; struct in_device *in_dev; u8 mcast_addr[ETH_ALEN]; struct ip_mc_list *pmc; int ret = 0; if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4) return 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) { rcu_read_unlock(); return 0; } for (pmc = rcu_dereference(in_dev->mc_list); pmc; pmc = rcu_dereference(pmc->next_rcu)) { if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv4_is_local_multicast(pmc->multiaddr)) continue; if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) && !ipv4_is_local_multicast(pmc->multiaddr)) continue; ip_eth_mc_map(pmc->multiaddr, mcast_addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; break; } ether_addr_copy(new->addr, mcast_addr); hlist_add_head(&new->list, mcast_list); ret++; } rcu_read_unlock(); return ret; } /** * batadv_mcast_mla_softif_get_ipv6() - get softif IPv6 multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of IPv6 multicast listeners residing * on this kernel on the given soft interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ #if IS_ENABLED(CONFIG_IPV6) static int batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct batadv_hw_addr *new; struct inet6_dev *in6_dev; u8 mcast_addr[ETH_ALEN]; struct ifmcaddr6 *pmc6; int ret = 0; if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6) return 0; rcu_read_lock(); in6_dev = __in6_dev_get(dev); if (!in6_dev) { rcu_read_unlock(); return 0; } for (pmc6 = rcu_dereference(in6_dev->mc_list); pmc6; pmc6 = rcu_dereference(pmc6->next)) { if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) continue; if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv6_addr_is_ll_all_nodes(&pmc6->mca_addr)) continue; if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) && IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) > IPV6_ADDR_SCOPE_LINKLOCAL) continue; ipv6_eth_mc_map(&pmc6->mca_addr, mcast_addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; break; } ether_addr_copy(new->addr, mcast_addr); hlist_add_head(&new->list, mcast_list); ret++; } rcu_read_unlock(); return ret; } #else static inline int batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { return 0; } #endif /** * batadv_mcast_mla_softif_get() - get softif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of multicast listeners residing * on this kernel on the given soft interface, dev, in * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * * If there is a bridge interface on top of dev, collect from that one * instead. Just like with IP addresses and routes, multicast listeners * will(/should) register to the bridge interface instead of an * enslaved bat0. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_softif_get(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct net_device *bridge = batadv_mcast_get_bridge(dev); int ret4, ret6 = 0; if (bridge) dev = bridge; ret4 = batadv_mcast_mla_softif_get_ipv4(dev, mcast_list, flags); if (ret4 < 0) goto out; ret6 = batadv_mcast_mla_softif_get_ipv6(dev, mcast_list, flags); if (ret6 < 0) { ret4 = 0; goto out; } out: dev_put(bridge); return ret4 + ret6; } /** * batadv_mcast_mla_br_addr_cpy() - copy a bridge multicast address * @dst: destination to write to - a multicast MAC address * @src: source to read from - a multicast IP address * * Converts a given multicast IPv4/IPv6 address from a bridge * to its matching multicast MAC address and copies it into the given * destination buffer. * * Caller needs to make sure the destination buffer can hold * at least ETH_ALEN bytes. */ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) { if (src->proto == htons(ETH_P_IP)) ip_eth_mc_map(src->dst.ip4, dst); #if IS_ENABLED(CONFIG_IPV6) else if (src->proto == htons(ETH_P_IPV6)) ipv6_eth_mc_map(&src->dst.ip6, dst); #endif else eth_zero_addr(dst); } /** * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners * @dev: a bridge slave whose bridge to collect multicast addresses from * @mcast_list: a list to put found addresses into * @flags: flags indicating the new multicast state * * Collects multicast addresses of multicast listeners residing * on foreign, non-mesh devices which we gave access to our mesh via * a bridge on top of the given soft interface, dev, in the given * mcast_list. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. */ static int batadv_mcast_mla_bridge_get(struct net_device *dev, struct hlist_head *mcast_list, struct batadv_mcast_mla_flags *flags) { struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list); struct br_ip_list *br_ip_entry, *tmp; u8 tvlv_flags = flags->tvlv_flags; struct batadv_hw_addr *new; u8 mcast_addr[ETH_ALEN]; int ret; /* we don't need to detect these devices/listeners, the IGMP/MLD * snooping code of the Linux bridge already does that for us */ ret = br_multicast_list_adjacent(dev, &bridge_mcast_list); if (ret < 0) goto out; list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) { if (br_ip_entry->addr.proto == htons(ETH_P_IP)) { if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4) continue; if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4)) continue; if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) && !ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4)) continue; } #if IS_ENABLED(CONFIG_IPV6) if (br_ip_entry->addr.proto == htons(ETH_P_IPV6)) { if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6) continue; if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.dst.ip6)) continue; if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) && IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.dst.ip6) > IPV6_ADDR_SCOPE_LINKLOCAL) continue; } #endif batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr); if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) continue; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; break; } ether_addr_copy(new->addr, mcast_addr); hlist_add_head(&new->list, mcast_list); } out: list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) { list_del(&br_ip_entry->list); kfree(br_ip_entry); } return ret; } /** * batadv_mcast_mla_list_free() - free a list of multicast addresses * @mcast_list: the list to free * * Removes and frees all items in the given mcast_list. */ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { hlist_del(&mcast_entry->list); kfree(mcast_entry); } } /** * batadv_mcast_mla_tt_retract() - clean up multicast listener announcements * @bat_priv: the bat priv with all the soft interface information * @mcast_list: a list of addresses which should _not_ be removed * * Retracts the announcement of any multicast listener from the * translation table except the ones listed in the given mcast_list. * * If mcast_list is NULL then all are retracted. */ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, list) { if (mcast_list && batadv_mcast_mla_is_duplicate(mcast_entry->addr, mcast_list)) continue; batadv_tt_local_remove(bat_priv, mcast_entry->addr, BATADV_NO_FLAGS, "mcast TT outdated", false); hlist_del(&mcast_entry->list); kfree(mcast_entry); } } /** * batadv_mcast_mla_tt_add() - add multicast listener announcements * @bat_priv: the bat priv with all the soft interface information * @mcast_list: a list of addresses which are going to get added * * Adds multicast listener announcements from the given mcast_list to the * translation table if they have not been added yet. */ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; if (!mcast_list) return; hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { if (batadv_mcast_mla_is_duplicate(mcast_entry->addr, &bat_priv->mcast.mla_list)) continue; if (!batadv_tt_local_add(bat_priv->soft_iface, mcast_entry->addr, BATADV_NO_FLAGS, BATADV_NULL_IFINDEX, BATADV_NO_MARK)) continue; hlist_del(&mcast_entry->list); hlist_add_head(&mcast_entry->list, &bat_priv->mcast.mla_list); } } /** * batadv_mcast_querier_log() - debug output regarding the querier status on * link * @bat_priv: the bat priv with all the soft interface information * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") * @old_state: the previous querier state on our link * @new_state: the new querier state on our link * * Outputs debug messages to the logging facility with log level 'mcast' * regarding changes to the querier status on the link which are relevant * to our multicast optimizations. * * Usually this is about whether a querier appeared or vanished in * our mesh or whether the querier is in the suboptimal position of being * behind our local bridge segment: Snooping switches will directly * forward listener reports to the querier, therefore batman-adv and * the bridge will potentially not see these listeners - the querier is * potentially shadowing listeners from us then. * * This is only interesting for nodes with a bridge on top of their * soft interface. */ static void batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, struct batadv_mcast_querier_state *old_state, struct batadv_mcast_querier_state *new_state) { if (!old_state->exists && new_state->exists) batadv_info(bat_priv->soft_iface, "%s Querier appeared\n", str_proto); else if (old_state->exists && !new_state->exists) batadv_info(bat_priv->soft_iface, "%s Querier disappeared - multicast optimizations disabled\n", str_proto); else if (!bat_priv->mcast.mla_flags.bridged && !new_state->exists) batadv_info(bat_priv->soft_iface, "No %s Querier present - multicast optimizations disabled\n", str_proto); if (new_state->exists) { if ((!old_state->shadowing && new_state->shadowing) || (!old_state->exists && new_state->shadowing)) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "%s Querier is behind our bridged segment: Might shadow listeners\n", str_proto); else if (old_state->shadowing && !new_state->shadowing) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "%s Querier is not behind our bridged segment\n", str_proto); } } /** * batadv_mcast_bridge_log() - debug output for topology changes in bridged * setups * @bat_priv: the bat priv with all the soft interface information * @new_flags: flags indicating the new multicast state * * If no bridges are ever used on this node, then this function does nothing. * * Otherwise this function outputs debug information to the 'mcast' log level * which might be relevant to our multicast optimizations. * * More precisely, it outputs information when a bridge interface is added or * removed from a soft interface. And when a bridge is present, it further * outputs information about the querier state which is relevant for the * multicast flags this node is going to set. */ static void batadv_mcast_bridge_log(struct batadv_priv *bat_priv, struct batadv_mcast_mla_flags *new_flags) { struct batadv_mcast_mla_flags *old_flags = &bat_priv->mcast.mla_flags; if (!old_flags->bridged && new_flags->bridged) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "Bridge added: Setting Unsnoopables(U)-flag\n"); else if (old_flags->bridged && !new_flags->bridged) batadv_dbg(BATADV_DBG_MCAST, bat_priv, "Bridge removed: Unsetting Unsnoopables(U)-flag\n"); if (new_flags->bridged) { batadv_mcast_querier_log(bat_priv, "IGMP", &old_flags->querier_ipv4, &new_flags->querier_ipv4); batadv_mcast_querier_log(bat_priv, "MLD", &old_flags->querier_ipv6, &new_flags->querier_ipv6); } } /** * batadv_mcast_flags_log() - output debug information about mcast flag changes * @bat_priv: the bat priv with all the soft interface information * @flags: TVLV flags indicating the new multicast state * * Whenever the multicast TVLV flags this node announces change, this function * should be used to notify userspace about the change. */ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) { bool old_enabled = bat_priv->mcast.mla_flags.enabled; u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags; char str_old_flags[] = "[.... . .]"; sprintf(str_old_flags, "[%c%c%c%s%s%c]", (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ", !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.'); batadv_dbg(BATADV_DBG_MCAST, bat_priv, "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n", old_enabled ? str_old_flags : "<undefined>", (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ", !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.'); } /** * batadv_mcast_mla_flags_update() - update multicast flags * @bat_priv: the bat priv with all the soft interface information * @flags: flags indicating the new multicast state * * Updates the own multicast tvlv with our current multicast related settings, * capabilities and inabilities. */ static void batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv, struct batadv_mcast_mla_flags *flags) { struct batadv_tvlv_mcast_data mcast_data; if (!memcmp(flags, &bat_priv->mcast.mla_flags, sizeof(*flags))) return; batadv_mcast_bridge_log(bat_priv, flags); batadv_mcast_flags_log(bat_priv, flags->tvlv_flags); mcast_data.flags = flags->tvlv_flags; memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved)); batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2, &mcast_data, sizeof(mcast_data)); bat_priv->mcast.mla_flags = *flags; } /** * __batadv_mcast_mla_update() - update the own MLAs * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. * * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are * ensured by the non-parallel execution of the worker this function * belongs to. */ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) { struct net_device *soft_iface = bat_priv->soft_iface; struct hlist_head mcast_list = HLIST_HEAD_INIT; struct batadv_mcast_mla_flags flags; int ret; flags = batadv_mcast_mla_flags_get(bat_priv); ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list, &flags); if (ret < 0) goto out; ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list, &flags); if (ret < 0) goto out; spin_lock(&bat_priv->mcast.mla_lock); batadv_mcast_mla_tt_retract(bat_priv, &mcast_list); batadv_mcast_mla_tt_add(bat_priv, &mcast_list); batadv_mcast_mla_flags_update(bat_priv, &flags); spin_unlock(&bat_priv->mcast.mla_lock); out: batadv_mcast_mla_list_free(&mcast_list); } /** * batadv_mcast_mla_update() - update the own MLAs * @work: kernel work struct * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. * * In the end, reschedules the work timer. */ static void batadv_mcast_mla_update(struct work_struct *work) { struct delayed_work *delayed_work; struct batadv_priv_mcast *priv_mcast; struct batadv_priv *bat_priv; delayed_work = to_delayed_work(work); priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work); bat_priv = container_of(priv_mcast, struct batadv_priv, mcast); __batadv_mcast_mla_update(bat_priv); batadv_mcast_start_timer(bat_priv); } /** * batadv_mcast_is_report_ipv4() - check for IGMP reports * @skb: the ethernet frame destined for the mesh * * This call might reallocate skb data. * * Checks whether the given frame is a valid IGMP report. * * Return: If so then true, otherwise false. */ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) { if (ip_mc_check_igmp(skb) < 0) return false; switch (igmp_hdr(skb)->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: case IGMPV3_HOST_MEMBERSHIP_REPORT: return true; } return false; } /** * batadv_mcast_forw_mode_check_ipv4() - check for optimized forwarding * potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv4 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable * * Checks whether the given IPv4 packet has the potential to be forwarded with a * mode more optimal than classic flooding. * * Return: If so then 0. Otherwise -EINVAL or -ENOMEM in case of memory * allocation failure. */ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, struct sk_buff *skb, bool *is_unsnoopable, int *is_routable) { struct iphdr *iphdr; /* We might fail due to out-of-memory -> drop it */ if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr))) return -ENOMEM; if (batadv_mcast_is_report_ipv4(skb)) return -EINVAL; iphdr = ip_hdr(skb); /* link-local multicast listeners behind a bridge are * not snoopable (see RFC4541, section 2.1.2.2) */ if (ipv4_is_local_multicast(iphdr->daddr)) *is_unsnoopable = true; else *is_routable = ETH_P_IP; return 0; } /** * batadv_mcast_is_report_ipv6() - check for MLD reports * @skb: the ethernet frame destined for the mesh * * This call might reallocate skb data. * * Checks whether the given frame is a valid MLD report. * * Return: If so then true, otherwise false. */ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) { if (ipv6_mc_check_mld(skb) < 0) return false; switch (icmp6_hdr(skb)->icmp6_type) { case ICMPV6_MGM_REPORT: case ICMPV6_MLD2_REPORT: return true; } return false; } /** * batadv_mcast_forw_mode_check_ipv6() - check for optimized forwarding * potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv6 packet to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable * * Checks whether the given IPv6 packet has the potential to be forwarded with a * mode more optimal than classic flooding. * * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory */ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, struct sk_buff *skb, bool *is_unsnoopable, int *is_routable) { struct ipv6hdr *ip6hdr; /* We might fail due to out-of-memory -> drop it */ if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr))) return -ENOMEM; if (batadv_mcast_is_report_ipv6(skb)) return -EINVAL; ip6hdr = ipv6_hdr(skb); if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) < IPV6_ADDR_SCOPE_LINKLOCAL) return -EINVAL; /* link-local-all-nodes multicast listeners behind a bridge are * not snoopable (see RFC4541, section 3, paragraph 3) */ if (ipv6_addr_is_ll_all_nodes(&ip6hdr->daddr)) *is_unsnoopable = true; else if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) > IPV6_ADDR_SCOPE_LINKLOCAL) *is_routable = ETH_P_IPV6; return 0; } /** * batadv_mcast_forw_mode_check() - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast frame to check * @is_unsnoopable: stores whether the destination is snoopable * @is_routable: stores whether the destination is routable * * Checks whether the given multicast ethernet frame has the potential to be * forwarded with a mode more optimal than classic flooding. * * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory */ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, struct sk_buff *skb, bool *is_unsnoopable, int *is_routable) { struct ethhdr *ethhdr = eth_hdr(skb); if (!atomic_read(&bat_priv->multicast_mode)) return -EINVAL; switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable, is_routable); case ETH_P_IPV6: if (!IS_ENABLED(CONFIG_IPV6)) return -EINVAL; return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable, is_routable); default: return -EINVAL; } } /** * batadv_mcast_forw_want_all_ip_count() - count nodes with unspecific mcast * interest * @bat_priv: the bat priv with all the soft interface information * @ethhdr: ethernet header of a packet * * Return: the number of nodes which want all IPv4 multicast traffic if the * given ethhdr is from an IPv4 packet or the number of nodes which want all * IPv6 traffic if it matches an IPv6 packet. */ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv, struct ethhdr *ethhdr) { switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: return atomic_read(&bat_priv->mcast.num_want_all_ipv4); case ETH_P_IPV6: return atomic_read(&bat_priv->mcast.num_want_all_ipv6); default: /* we shouldn't be here... */ return 0; } } /** * batadv_mcast_forw_rtr_count() - count nodes with a multicast router * @bat_priv: the bat priv with all the soft interface information * @protocol: the ethernet protocol type to count multicast routers for * * Return: the number of nodes which want all routable IPv4 multicast traffic * if the protocol is ETH_P_IP or the number of nodes which want all routable * IPv6 traffic if the protocol is ETH_P_IPV6. Otherwise returns 0. */ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv, int protocol) { switch (protocol) { case ETH_P_IP: return atomic_read(&bat_priv->mcast.num_want_all_rtr4); case ETH_P_IPV6: return atomic_read(&bat_priv->mcast.num_want_all_rtr6); default: return 0; } } /** * batadv_mcast_forw_mode_by_count() - get forwarding mode by count * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to check * @vid: the vlan identifier * @is_routable: stores whether the destination is routable * @count: the number of originators the multicast packet need to be sent to * * For a multicast packet with multiple destination originators, checks which * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a * complete batman-adv multicast header. * * Return: * BATADV_FORW_MCAST: If all nodes have multicast packet routing * capabilities and an MTU >= 1280 on all hard interfaces (including us) * and the encapsulated multicast packet with all destination addresses * would still fit into an 1280 bytes batman-adv multicast packet * (excluding the outer ethernet frame) and we could successfully push * the full batman-adv multicast packet header. * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv * multicast packet and the amount of batman-adv unicast packets needed * is smaller or equal to the configured multicast fanout. * BATADV_FORW_BCAST: Otherwise. */ static enum batadv_forw_mode batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, int is_routable, int count) { unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count); u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags; if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) && own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA && skb->len + mcast_hdrlen <= IPV6_MIN_MTU && batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count)) return BATADV_FORW_MCAST; if (count <= atomic_read(&bat_priv->multicast_fanout)) return BATADV_FORW_UCASTS; return BATADV_FORW_BCAST; } /** * batadv_mcast_forw_mode() - check on how to forward a multicast packet * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to check * @vid: the vlan identifier * @is_routable: stores whether the destination is routable * * Return: The forwarding mode as enum batadv_forw_mode. */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; struct ethhdr *ethhdr; int rtr_count = 0; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable, is_routable); if (ret == -ENOMEM) return BATADV_FORW_NONE; else if (ret < 0) return BATADV_FORW_BCAST; ethhdr = eth_hdr(skb); tt_count = batadv_tt_global_hash_count(bat_priv, ethhdr->h_dest, BATADV_NO_FLAGS); ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); unsnoop_count = !is_unsnoopable ? 0 : atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable); total_count = tt_count + ip_count + unsnoop_count + rtr_count; if (!total_count) return BATADV_FORW_NONE; else if (unsnoop_count) return BATADV_FORW_BCAST; return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable, total_count); } /** * batadv_mcast_forw_send_orig() - send a multicast packet to an originator * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to send * @vid: the vlan identifier * @orig_node: the originator to send the packet to * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, struct batadv_orig_node *orig_node) { /* Avoid sending multicast-in-unicast packets to other BLA * gateways - they already got the frame from the LAN side * we share with them. * TODO: Refactor to take BLA into account earlier, to avoid * reducing the mcast_fanout count. */ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) { dev_kfree_skb(skb); return NET_XMIT_SUCCESS; } return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, orig_node, vid); } /** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any multicast * listener registered in the translation table. A transmission is performed * via a batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; struct batadv_tt_orig_list_entry *orig_entry; struct batadv_tt_global_entry *tt_global; const u8 *addr = eth_hdr(skb)->h_dest; tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global) goto out; rcu_read_lock(); hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_entry->orig_node); } rcu_read_unlock(); batadv_tt_global_entry_put(tt_global); out: return ret; } /** * batadv_mcast_forw_want_all_ipv4() - forward to nodes with want-all-ipv4 * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_ALL_IPV4 flag set. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_ipv4_list, mcast_want_all_ipv4_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_all_ipv6() - forward to nodes with want-all-ipv6 * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_ALL_IPV6 flag set. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_ipv6_list, mcast_want_all_ipv6_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_all() - forward packet to nodes in a want-all list * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_ALL_IPV4 or BATADV_MCAST_WANT_ALL_IPV6 flag set. A * transmission is performed via a batman-adv unicast packet for each such * destination node. * * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ static int batadv_mcast_forw_want_all(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { switch (ntohs(eth_hdr(skb)->h_proto)) { case ETH_P_IP: return batadv_mcast_forw_want_all_ipv4(bat_priv, skb, vid); case ETH_P_IPV6: return batadv_mcast_forw_want_all_ipv6(bat_priv, skb, vid); default: /* we shouldn't be here... */ return NET_XMIT_DROP; } } /** * batadv_mcast_forw_want_all_rtr4() - forward to nodes with want-all-rtr4 * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_NO_RTR4 flag unset. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_rtr4_list, mcast_want_all_rtr4_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_all_rtr6() - forward to nodes with want-all-rtr6 * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_NO_RTR6 flag unset. A transmission is performed via a * batman-adv unicast packet for each such destination node. * * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS * otherwise. */ static int batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret = NET_XMIT_SUCCESS; struct sk_buff *newskb; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, &bat_priv->mcast.want_all_rtr6_list, mcast_want_all_rtr6_node) { newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) { ret = NET_XMIT_DROP; break; } batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; } /** * batadv_mcast_forw_want_rtr() - forward packet to nodes in a want-all-rtr list * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * * Sends copies of a frame with multicast destination to any node with a * BATADV_MCAST_WANT_NO_RTR4 or BATADV_MCAST_WANT_NO_RTR6 flag unset. A * transmission is performed via a batman-adv unicast packet for each such * destination node. * * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ static int batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { switch (ntohs(eth_hdr(skb)->h_proto)) { case ETH_P_IP: return batadv_mcast_forw_want_all_rtr4(bat_priv, skb, vid); case ETH_P_IPV6: return batadv_mcast_forw_want_all_rtr6(bat_priv, skb, vid); default: /* we shouldn't be here... */ return NET_XMIT_DROP; } } /** * batadv_mcast_forw_send() - send packet to any detected multicast recipient * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier * @is_routable: stores whether the destination is routable * * Sends copies of a frame with multicast destination to any node that signaled * interest in it, that is either via the translation table or the according * want-all flags. A transmission is performed via a batman-adv unicast packet * for each such destination node. * * The given skb is consumed/freed. * * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, int is_routable) { int ret; ret = batadv_mcast_forw_tt(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } ret = batadv_mcast_forw_want_all(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } if (!is_routable) goto skip_mc_router; ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } skip_mc_router: consume_skb(skb); return ret; } /** * batadv_mcast_want_unsnoop_update() - update unsnoop counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator, * orig, has toggled then this method updates the counter and the list * accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node; struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) { atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) && orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) { atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_ipv4_update() - update want-all-ipv4 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_ipv4_node; struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) && orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) { atomic_dec(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_ipv6_update() - update want-all-ipv6 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_ipv6_node; struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) && orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) { atomic_dec(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_rtr4_update() - update want-all-rtr4 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_rtr4_node; struct hlist_head *head = &bat_priv->mcast.want_all_rtr4_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag set to unset */ if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR4) && orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4) { atomic_inc(&bat_priv->mcast.num_want_all_rtr4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag unset to set */ } else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR4 && !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4)) { atomic_dec(&bat_priv->mcast.num_want_all_rtr4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_want_rtr6_update() - update want-all-rtr6 counter and list * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_rtr6_node; struct hlist_head *head = &bat_priv->mcast.want_all_rtr6_list; lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag set to unset */ if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR6) && orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6) { atomic_inc(&bat_priv->mcast.num_want_all_rtr6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(!hlist_unhashed(node)); hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag unset to set */ } else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR6 && !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6)) { atomic_dec(&bat_priv->mcast.num_want_all_rtr6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); /* flag checks above + mcast_handler_lock prevents this */ WARN_ON(hlist_unhashed(node)); hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } /** * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node which multicast state might have changed of * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has * toggled then this method updates the counter accordingly. */ static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 mcast_flags) { lockdep_assert_held(&orig->mcast_handler_lock); /* switched from flag set to unset */ if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) && orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa); /* switched from flag unset to set */ else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA && !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)) atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa); } /** * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV * @enabled: whether the originator has multicast TVLV support enabled * @tvlv_value: tvlv buffer containing the multicast flags * @tvlv_value_len: tvlv buffer length * * Return: multicast flags for the given tvlv buffer */ static u8 batadv_mcast_tvlv_flags_get(bool enabled, void *tvlv_value, u16 tvlv_value_len) { u8 mcast_flags = BATADV_NO_FLAGS; if (enabled && tvlv_value && tvlv_value_len >= sizeof(mcast_flags)) mcast_flags = *(u8 *)tvlv_value; if (!enabled) { mcast_flags |= BATADV_MCAST_WANT_ALL_IPV4; mcast_flags |= BATADV_MCAST_WANT_ALL_IPV6; } /* remove redundant flags to avoid sending duplicate packets later */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) mcast_flags |= BATADV_MCAST_WANT_NO_RTR4; if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) mcast_flags |= BATADV_MCAST_WANT_NO_RTR6; return mcast_flags; } /** * batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the multicast data * @tvlv_value_len: tvlv buffer length */ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 flags, void *tvlv_value, u16 tvlv_value_len) { bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND); u8 mcast_flags; mcast_flags = batadv_mcast_tvlv_flags_get(orig_mcast_enabled, tvlv_value, tvlv_value_len); spin_lock_bh(&orig->mcast_handler_lock); if (orig_mcast_enabled && !test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) { set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); } else if (!orig_mcast_enabled && test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) { clear_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); } set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized); batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags); batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags); orig->mcast_flags = mcast_flags; spin_unlock_bh(&orig->mcast_handler_lock); } /** * batadv_mcast_init() - initialize the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_init(struct batadv_priv *bat_priv) { batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, NULL, NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); batadv_tvlv_handler_register(bat_priv, NULL, NULL, batadv_mcast_forw_tracker_tvlv_handler, BATADV_TVLV_MCAST_TRACKER, 1, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update); batadv_mcast_start_timer(bat_priv); } /** * batadv_mcast_mesh_info_put() - put multicast info into a netlink message * @msg: buffer for the message * @bat_priv: the bat priv with all the soft interface information * * Return: 0 or error code. */ int batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv) { u32 flags = bat_priv->mcast.mla_flags.tvlv_flags; u32 flags_priv = BATADV_NO_FLAGS; if (bat_priv->mcast.mla_flags.bridged) { flags_priv |= BATADV_MCAST_FLAGS_BRIDGED; if (bat_priv->mcast.mla_flags.querier_ipv4.exists) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS; if (bat_priv->mcast.mla_flags.querier_ipv6.exists) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS; if (bat_priv->mcast.mla_flags.querier_ipv4.shadowing) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING; if (bat_priv->mcast.mla_flags.querier_ipv6.shadowing) flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING; } if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) || nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv)) return -EMSGSIZE; return 0; } /** * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table * to a netlink socket * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options * @orig_node: originator to dump the multicast flags of * * Return: 0 or error code. */ static int batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_orig_node *orig_node) { void *hdr; hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS); if (!hdr) return -ENOBUFS; genl_dump_check_consistent(cb, hdr); if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig)) { genlmsg_cancel(msg, hdr); return -EMSGSIZE; } if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig_node->capabilities)) { if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, orig_node->mcast_flags)) { genlmsg_cancel(msg, hdr); return -EMSGSIZE; } } genlmsg_end(msg, hdr); return 0; } /** * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags * table to a netlink socket * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options * @hash: hash to dump * @bucket: bucket index to dump * @idx_skip: How many entries to skip * * Return: 0 or error code. */ static int batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_hashtable *hash, unsigned int bucket, long *idx_skip) { struct batadv_orig_node *orig_node; long idx = 0; spin_lock_bh(&hash->list_locks[bucket]); cb->seq = atomic_read(&hash->generation) << 1 | 1; hlist_for_each_entry(orig_node, &hash->table[bucket], hash_entry) { if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig_node->capa_initialized)) continue; if (idx < *idx_skip) goto skip; if (batadv_mcast_flags_dump_entry(msg, portid, cb, orig_node)) { spin_unlock_bh(&hash->list_locks[bucket]); *idx_skip = idx; return -EMSGSIZE; } skip: idx++; } spin_unlock_bh(&hash->list_locks[bucket]); return 0; } /** * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket * @msg: buffer for the message * @portid: netlink port * @cb: Control block containing additional options * @bat_priv: the bat priv with all the soft interface information * @bucket: current bucket to dump * @idx: index in current bucket to the next entry to dump * * Return: 0 or error code. */ static int __batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, long *bucket, long *idx) { struct batadv_hashtable *hash = bat_priv->orig_hash; long bucket_tmp = *bucket; long idx_tmp = *idx; while (bucket_tmp < hash->size) { if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash, bucket_tmp, &idx_tmp)) break; bucket_tmp++; idx_tmp = 0; } *bucket = bucket_tmp; *idx = idx_tmp; return msg->len; } /** * batadv_mcast_netlink_get_primary() - get primary interface from netlink * callback * @cb: netlink callback structure * @primary_if: the primary interface pointer to return the result in * * Return: 0 or error code. */ static int batadv_mcast_netlink_get_primary(struct netlink_callback *cb, struct batadv_hard_iface **primary_if) { struct batadv_hard_iface *hard_iface = NULL; struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_priv *bat_priv; int ifindex; int ret = 0; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { ret = -ENODEV; goto out; } bat_priv = netdev_priv(soft_iface); hard_iface = batadv_primary_if_get_selected(bat_priv); if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; goto out; } out: dev_put(soft_iface); if (!ret && primary_if) *primary_if = hard_iface; else batadv_hardif_put(hard_iface); return ret; } /** * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket * @msg: buffer for the message * @cb: callback structure containing arguments * * Return: message length. */ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct batadv_hard_iface *primary_if = NULL; int portid = NETLINK_CB(cb->skb).portid; struct batadv_priv *bat_priv; long *bucket = &cb->args[0]; long *idx = &cb->args[1]; int ret; ret = batadv_mcast_netlink_get_primary(cb, &primary_if); if (ret) return ret; bat_priv = netdev_priv(primary_if->soft_iface); ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx); batadv_hardif_put(primary_if); return ret; } /** * batadv_mcast_free() - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->mcast.work); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); /* safely calling outside of worker, as worker was canceled above */ batadv_mcast_mla_tt_retract(bat_priv, NULL); } /** * batadv_mcast_purge_orig() - reset originator global mcast state modifications * @orig: the originator which is going to get purged */ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; spin_lock_bh(&orig->mcast_handler_lock); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_rtr4_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR4); batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR6); batadv_mcast_have_mc_ptype_update(bat_priv, orig, BATADV_MCAST_HAVE_MC_PTYPE_CAPA); spin_unlock_bh(&orig->mcast_handler_lock); }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H #include <linux/uaccess.h> #include <linux/ptrace.h> #include <asm/cpu_entry_area.h> #include <asm/switch_to.h> enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; struct stack_info { enum stack_type type; unsigned long *begin, *end, *next_sp; }; bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, struct stack_info *info); static __always_inline bool get_stack_guard_info(unsigned long *stack, struct stack_info *info) { /* make sure it's not in the stack proper */ if (get_stack_info_noinstr(stack, current, info)) return false; /* but if it is in the page below it, we hit a guard */ return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info); } const char *stack_type_name(enum stack_type type); static inline bool on_stack(struct stack_info *info, void *addr, size_t len) { void *begin = info->begin; void *end = info->end; return (info->type != STACK_TYPE_UNKNOWN && addr >= begin && addr < end && addr + len > begin && addr + len <= end); } #ifdef CONFIG_X86_32 #define STACKSLOTS_PER_LINE 8 #else #define STACKSLOTS_PER_LINE 4 #endif #ifdef CONFIG_FRAME_POINTER static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); return &((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { return NULL; } #endif /* CONFIG_FRAME_POINTER */ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); return (unsigned long *)task->thread.sp; } /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; unsigned long return_address; }; struct stack_frame_ia32 { u32 next_frame; u32 return_address; }; void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */
419 17 17 1 1264 1277 73 56 19 16 16 91 93 2 79 79 79 73 9 7 7 9 7 1 3 3 2 8 9 12 12 11 6 6 6 12 12 9 12 2 2 2 50 18 18 175 176 137 50 158 27 26 2 25 20 2 22 20 2 3 95 157 160 150 97 87 14 157 10 2 7 7 7 7 45 157 156 153 4 156 1 152 4 159 159 7 153 9 9 2 9 167 116 11 166 167 37 27 13 3 30 17 5 5 1 1 7 20 8 2 3 3 3 1 2 23 1 12 3 9 9 2 20 20 28 5 30 30 1 40 40 40 40 40 58 9 2 6 5 1 51 2 1 1 12 40 20 23 6 3 4 1 2 1 2 1 68 1 68 69 3 4 6 6 3 1 3 6 3 4 2 6 6 2 2 2 2 2 1 1 1 2 106 71 20 9 167 57 2 3 5 5 1 1 1 1 1 1 2 3 8 39 85 7 2 66 3 69 3 499 1 382 94 472 40 473 384 364 23 25 1 1 24 501 499 11 419 72 420 477 26 455 72 358 479 10 22 10 31 1 2 6 1 24 1 23 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 // SPDX-License-Identifier: GPL-2.0 /* * linux/mm/madvise.c * * Copyright (C) 1999 Linus Torvalds * Copyright (C) 2002 Christoph Hellwig */ #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/syscalls.h> #include <linux/mempolicy.h> #include <linux/page-isolation.h> #include <linux/page_idle.h> #include <linux/userfaultfd_k.h> #include <linux/hugetlb.h> #include <linux/falloc.h> #include <linux/fadvise.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/mm_inline.h> #include <linux/string.h> #include <linux/uio.h> #include <linux/ksm.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/pagewalk.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> #include <asm/tlb.h> #include "internal.h" #include "swap.h" /* * Maximum number of attempts we make to install guard pages before we give up * and return -ERESTARTNOINTR to have userspace try again. */ #define MAX_MADVISE_GUARD_RETRIES 3 struct madvise_walk_private { struct mmu_gather *tlb; bool pageout; }; /* * Any behaviour which results in changes to the vma->vm_flags needs to * take mmap_lock for writing. Others, which simply traverse vmas, need * to only take it for reading. */ static int madvise_need_mmap_write(int behavior) { switch (behavior) { case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: case MADV_DONTNEED_LOCKED: case MADV_COLD: case MADV_PAGEOUT: case MADV_FREE: case MADV_POPULATE_READ: case MADV_POPULATE_WRITE: case MADV_COLLAPSE: case MADV_GUARD_INSTALL: case MADV_GUARD_REMOVE: return 0; default: /* be safe, default to 1. list exceptions explicitly */ return 1; } } #ifdef CONFIG_ANON_VMA_NAME struct anon_vma_name *anon_vma_name_alloc(const char *name) { struct anon_vma_name *anon_name; size_t count; /* Add 1 for NUL terminator at the end of the anon_name->name */ count = strlen(name) + 1; anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL); if (anon_name) { kref_init(&anon_name->kref); memcpy(anon_name->name, name, count); } return anon_name; } void anon_vma_name_free(struct kref *kref) { struct anon_vma_name *anon_name = container_of(kref, struct anon_vma_name, kref); kfree(anon_name); } struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) { mmap_assert_locked(vma->vm_mm); return vma->anon_name; } /* mmap_lock should be write-locked */ static int replace_anon_vma_name(struct vm_area_struct *vma, struct anon_vma_name *anon_name) { struct anon_vma_name *orig_name = anon_vma_name(vma); if (!anon_name) { vma->anon_name = NULL; anon_vma_name_put(orig_name); return 0; } if (anon_vma_name_eq(orig_name, anon_name)) return 0; vma->anon_name = anon_vma_name_reuse(anon_name); anon_vma_name_put(orig_name); return 0; } #else /* CONFIG_ANON_VMA_NAME */ static int replace_anon_vma_name(struct vm_area_struct *vma, struct anon_vma_name *anon_name) { if (anon_name) return -EINVAL; return 0; } #endif /* CONFIG_ANON_VMA_NAME */ /* * Update the vm_flags on region of a vma, splitting it or merging it as * necessary. Must be called with mmap_lock held for writing; * Caller should ensure anon_name stability by raising its refcount even when * anon_name belongs to a valid vma because this function might free that vma. */ static int madvise_update_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long new_flags, struct anon_vma_name *anon_name) { struct mm_struct *mm = vma->vm_mm; int error; VMA_ITERATOR(vmi, mm, start); if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { *prev = vma; return 0; } vma = vma_modify_flags_name(&vmi, *prev, vma, start, end, new_flags, anon_name); if (IS_ERR(vma)) return PTR_ERR(vma); *prev = vma; /* vm_flags is protected by the mmap_lock held in write mode. */ vma_start_write(vma); vm_flags_reset(vma, new_flags); if (!vma->vm_file || vma_is_anon_shmem(vma)) { error = replace_anon_vma_name(vma, anon_name); if (error) return error; } return 0; } #ifdef CONFIG_SWAP static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->private; struct swap_iocb *splug = NULL; pte_t *ptep = NULL; spinlock_t *ptl; unsigned long addr; for (addr = start; addr < end; addr += PAGE_SIZE) { pte_t pte; swp_entry_t entry; struct folio *folio; if (!ptep++) { ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!ptep) break; } pte = ptep_get(ptep); if (!is_swap_pte(pte)) continue; entry = pte_to_swp_entry(pte); if (unlikely(non_swap_entry(entry))) continue; pte_unmap_unlock(ptep, ptl); ptep = NULL; folio = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, vma, addr, &splug); if (folio) folio_put(folio); } if (ptep) pte_unmap_unlock(ptep, ptl); swap_read_unplug(splug); cond_resched(); return 0; } static const struct mm_walk_ops swapin_walk_ops = { .pmd_entry = swapin_walk_pmd_entry, .walk_lock = PGWALK_RDLOCK, }; static void shmem_swapin_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); pgoff_t end_index = linear_page_index(vma, end) - 1; struct folio *folio; struct swap_iocb *splug = NULL; rcu_read_lock(); xas_for_each(&xas, folio, end_index) { unsigned long addr; swp_entry_t entry; if (!xa_is_value(folio)) continue; entry = radix_to_swp_entry(folio); /* There might be swapin error entries in shmem mapping. */ if (non_swap_entry(entry)) continue; addr = vma->vm_start + ((xas.xa_index - vma->vm_pgoff) << PAGE_SHIFT); xas_pause(&xas); rcu_read_unlock(); folio = read_swap_cache_async(entry, mapping_gfp_mask(mapping), vma, addr, &splug); if (folio) folio_put(folio); rcu_read_lock(); } rcu_read_unlock(); swap_read_unplug(splug); } #endif /* CONFIG_SWAP */ /* * Schedule all required I/O operations. Do not wait for completion. */ static long madvise_willneed(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; loff_t offset; *prev = vma; #ifdef CONFIG_SWAP if (!file) { walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma); lru_add_drain(); /* Push any new pages onto the LRU now */ return 0; } if (shmem_mapping(file->f_mapping)) { shmem_swapin_range(vma, start, end, file->f_mapping); lru_add_drain(); /* Push any new pages onto the LRU now */ return 0; } #else if (!file) return -EBADF; #endif if (IS_DAX(file_inode(file))) { /* no bad return value, but ignore advice */ return 0; } /* * Filesystem's fadvise may need to take various locks. We need to * explicitly grab a reference because the vma (and hence the * vma's reference to the file) can go away as soon as we drop * mmap_lock. */ *prev = NULL; /* tell sys_madvise we drop mmap_lock */ get_file(file); offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); mmap_read_unlock(mm); vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); fput(file); mmap_read_lock(mm); return 0; } static inline bool can_do_file_pageout(struct vm_area_struct *vma) { if (!vma->vm_file) return false; /* * paging out pagecache only for non-anonymous mappings that correspond * to the files the calling process could (if tried) open for writing; * otherwise we'd be including shared non-exclusive mappings, which * opens a side channel. */ return inode_owner_or_capable(&nop_mnt_idmap, file_inode(vma->vm_file)) || file_permission(vma->vm_file, MAY_WRITE) == 0; } static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end, struct folio *folio, pte_t *ptep, pte_t pte, bool *any_young, bool *any_dirty) { const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; int max_nr = (end - addr) / PAGE_SIZE; return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, any_young, any_dirty); } static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct madvise_walk_private *private = walk->private; struct mmu_gather *tlb = private->tlb; bool pageout = private->pageout; struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; pte_t *start_pte, *pte, ptent; spinlock_t *ptl; struct folio *folio = NULL; LIST_HEAD(folio_list); bool pageout_anon_only_filter; unsigned int batch_count = 0; int nr; if (fatal_signal_pending(current)) return -EINTR; pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) && !can_do_file_pageout(vma); #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge(*pmd)) { pmd_t orig_pmd; unsigned long next = pmd_addr_end(addr, end); tlb_change_page_size(tlb, HPAGE_PMD_SIZE); ptl = pmd_trans_huge_lock(pmd, vma); if (!ptl) return 0; orig_pmd = *pmd; if (is_huge_zero_pmd(orig_pmd)) goto huge_unlock; if (unlikely(!pmd_present(orig_pmd))) { VM_BUG_ON(thp_migration_supported() && !is_pmd_migration_entry(orig_pmd)); goto huge_unlock; } folio = pmd_folio(orig_pmd); /* Do not interfere with other mappings of this folio */ if (folio_likely_mapped_shared(folio)) goto huge_unlock; if (pageout_anon_only_filter && !folio_test_anon(folio)) goto huge_unlock; if (next - addr != HPAGE_PMD_SIZE) { int err; folio_get(folio); spin_unlock(ptl); folio_lock(folio); err = split_folio(folio); folio_unlock(folio); folio_put(folio); if (!err) goto regular_folio; return 0; } if (!pageout && pmd_young(orig_pmd)) { pmdp_invalidate(vma, addr, pmd); orig_pmd = pmd_mkold(orig_pmd); set_pmd_at(mm, addr, pmd, orig_pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); } folio_clear_referenced(folio); folio_test_clear_young(folio); if (folio_test_active(folio)) folio_set_workingset(folio); if (pageout) { if (folio_isolate_lru(folio)) { if (folio_test_unevictable(folio)) folio_putback_lru(folio); else list_add(&folio->lru, &folio_list); } } else folio_deactivate(folio); huge_unlock: spin_unlock(ptl); if (pageout) reclaim_pages(&folio_list); return 0; } regular_folio: #endif tlb_change_page_size(tlb, PAGE_SIZE); restart: start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!start_pte) return 0; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) { nr = 1; ptent = ptep_get(pte); if (++batch_count == SWAP_CLUSTER_MAX) { batch_count = 0; if (need_resched()) { arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); cond_resched(); goto restart; } } if (pte_none(ptent)) continue; if (!pte_present(ptent)) continue; folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; /* * If we encounter a large folio, only split it if it is not * fully mapped within the range we are operating on. Otherwise * leave it as is so that it can be swapped out whole. If we * fail to split a folio, leave it in place and advance to the * next pte in the range. */ if (folio_test_large(folio)) { bool any_young; nr = madvise_folio_pte_batch(addr, end, folio, pte, ptent, &any_young, NULL); if (any_young) ptent = pte_mkyoung(ptent); if (nr < folio_nr_pages(folio)) { int err; if (folio_likely_mapped_shared(folio)) continue; if (pageout_anon_only_filter && !folio_test_anon(folio)) continue; if (!folio_trylock(folio)) continue; folio_get(folio); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); start_pte = NULL; err = split_folio(folio); folio_unlock(folio); folio_put(folio); start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!start_pte) break; arch_enter_lazy_mmu_mode(); if (!err) nr = 0; continue; } } /* * Do not interfere with other mappings of this folio and * non-LRU folio. If we have a large folio at this point, we * know it is fully mapped so if its mapcount is the same as its * number of pages, it must be exclusive. */ if (!folio_test_lru(folio) || folio_mapcount(folio) != folio_nr_pages(folio)) continue; if (pageout_anon_only_filter && !folio_test_anon(folio)) continue; if (!pageout && pte_young(ptent)) { clear_young_dirty_ptes(vma, addr, pte, nr, CYDP_CLEAR_YOUNG); tlb_remove_tlb_entries(tlb, pte, nr, addr); } /* * We are deactivating a folio for accelerating reclaiming. * VM couldn't reclaim the folio unless we clear PG_young. * As a side effect, it makes confuse idle-page tracking * because they will miss recent referenced history. */ folio_clear_referenced(folio); folio_test_clear_young(folio); if (folio_test_active(folio)) folio_set_workingset(folio); if (pageout) { if (folio_isolate_lru(folio)) { if (folio_test_unevictable(folio)) folio_putback_lru(folio); else list_add(&folio->lru, &folio_list); } } else folio_deactivate(folio); } if (start_pte) { arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); } if (pageout) reclaim_pages(&folio_list); cond_resched(); return 0; } static const struct mm_walk_ops cold_walk_ops = { .pmd_entry = madvise_cold_or_pageout_pte_range, .walk_lock = PGWALK_RDLOCK, }; static void madvise_cold_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct madvise_walk_private walk_private = { .pageout = false, .tlb = tlb, }; tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); } static inline bool can_madv_lru_vma(struct vm_area_struct *vma) { return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB)); } static long madvise_cold(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; lru_add_drain(); tlb_gather_mmu(&tlb, mm); madvise_cold_page_range(&tlb, vma, start_addr, end_addr); tlb_finish_mmu(&tlb); return 0; } static void madvise_pageout_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct madvise_walk_private walk_private = { .pageout = true, .tlb = tlb, }; tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); } static long madvise_pageout(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; /* * If the VMA belongs to a private file mapping, there can be private * dirty pages which can be paged out if even this process is neither * owner nor write capable of the file. We allow private file mappings * further to pageout dirty anon pages. */ if (!vma_is_anonymous(vma) && (!can_do_file_pageout(vma) && (vma->vm_flags & VM_MAYSHARE))) return 0; lru_add_drain(); tlb_gather_mmu(&tlb, mm); madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); tlb_finish_mmu(&tlb); return 0; } static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { const cydp_t cydp_flags = CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY; struct mmu_gather *tlb = walk->private; struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; pte_t *start_pte, *pte, ptent; struct folio *folio; int nr_swap = 0; unsigned long next; int nr, max_nr; next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd)) if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) return 0; tlb_change_page_size(tlb, PAGE_SIZE); start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!start_pte) return 0; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) { nr = 1; ptent = ptep_get(pte); if (pte_none(ptent)) continue; /* * If the pte has swp_entry, just clear page table to * prevent swap-in which is more expensive rather than * (page allocation + zeroing). */ if (!pte_present(ptent)) { swp_entry_t entry; entry = pte_to_swp_entry(ptent); if (!non_swap_entry(entry)) { max_nr = (end - addr) / PAGE_SIZE; nr = swap_pte_batch(pte, max_nr, ptent); nr_swap -= nr; free_swap_and_cache_nr(entry, nr); clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm); } else if (is_hwpoison_entry(entry) || is_poisoned_swp_entry(entry)) { pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } continue; } folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; /* * If we encounter a large folio, only split it if it is not * fully mapped within the range we are operating on. Otherwise * leave it as is so that it can be marked as lazyfree. If we * fail to split a folio, leave it in place and advance to the * next pte in the range. */ if (folio_test_large(folio)) { bool any_young, any_dirty; nr = madvise_folio_pte_batch(addr, end, folio, pte, ptent, &any_young, &any_dirty); if (nr < folio_nr_pages(folio)) { int err; if (folio_likely_mapped_shared(folio)) continue; if (!folio_trylock(folio)) continue; folio_get(folio); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); start_pte = NULL; err = split_folio(folio); folio_unlock(folio); folio_put(folio); pte = pte_offset_map_lock(mm, pmd, addr, &ptl); start_pte = pte; if (!start_pte) break; arch_enter_lazy_mmu_mode(); if (!err) nr = 0; continue; } if (any_young) ptent = pte_mkyoung(ptent); if (any_dirty) ptent = pte_mkdirty(ptent); } if (folio_test_swapcache(folio) || folio_test_dirty(folio)) { if (!folio_trylock(folio)) continue; /* * If we have a large folio at this point, we know it is * fully mapped so if its mapcount is the same as its * number of pages, it must be exclusive. */ if (folio_mapcount(folio) != folio_nr_pages(folio)) { folio_unlock(folio); continue; } if (folio_test_swapcache(folio) && !folio_free_swap(folio)) { folio_unlock(folio); continue; } folio_clear_dirty(folio); folio_unlock(folio); } if (pte_young(ptent) || pte_dirty(ptent)) { clear_young_dirty_ptes(vma, addr, pte, nr, cydp_flags); tlb_remove_tlb_entries(tlb, pte, nr, addr); } folio_mark_lazyfree(folio); } if (nr_swap) add_mm_counter(mm, MM_SWAPENTS, nr_swap); if (start_pte) { arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); } cond_resched(); return 0; } static const struct mm_walk_ops madvise_free_walk_ops = { .pmd_entry = madvise_free_pte_range, .walk_lock = PGWALK_RDLOCK, }; static int madvise_free_single_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) { struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; struct mmu_gather tlb; /* MADV_FREE works for only anon vma at the moment */ if (!vma_is_anonymous(vma)) return -EINVAL; range.start = max(vma->vm_start, start_addr); if (range.start >= vma->vm_end) return -EINVAL; range.end = min(vma->vm_end, end_addr); if (range.end <= vma->vm_start) return -EINVAL; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, range.start, range.end); lru_add_drain(); tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); tlb_start_vma(&tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, &madvise_free_walk_ops, &tlb); tlb_end_vma(&tlb, vma); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); return 0; } /* * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The * zap_page_range_single call sets things up for shrink_active_list to actually * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * * NB: This interface discards data rather than pushes it out to swap, * as some implementations do. This has performance implications for * applications like large transactional databases which want to discard * pages in anonymous maps after committing to backing store the data * that was kept in them. There is no reason to write this data out to * the swap area if the application is discarding it. * * An interface that causes the system to free clean pages and flush * dirty pages is already available as msync(MS_INVALIDATE). */ static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { zap_page_range_single(vma, start, end - start, NULL); return 0; } static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma, unsigned long start, unsigned long *end, int behavior) { if (!is_vm_hugetlb_page(vma)) { unsigned int forbidden = VM_PFNMAP; if (behavior != MADV_DONTNEED_LOCKED) forbidden |= VM_LOCKED; return !(vma->vm_flags & forbidden); } if (behavior != MADV_DONTNEED && behavior != MADV_DONTNEED_LOCKED) return false; if (start & ~huge_page_mask(hstate_vma(vma))) return false; /* * Madvise callers expect the length to be rounded up to PAGE_SIZE * boundaries, and may be unaware that this VMA uses huge pages. * Avoid unexpected data loss by rounding down the number of * huge pages freed. */ *end = ALIGN_DOWN(*end, huge_page_size(hstate_vma(vma))); return true; } static long madvise_dontneed_free(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { struct mm_struct *mm = vma->vm_mm; *prev = vma; if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior)) return -EINVAL; if (start == end) return 0; if (!userfaultfd_remove(vma, start, end)) { *prev = NULL; /* mmap_lock has been dropped, prev is stale */ mmap_read_lock(mm); vma = vma_lookup(mm, start); if (!vma) return -ENOMEM; /* * Potential end adjustment for hugetlb vma is OK as * the check below keeps end within vma. */ if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior)) return -EINVAL; if (end > vma->vm_end) { /* * Don't fail if end > vma->vm_end. If the old * vma was split while the mmap_lock was * released the effect of the concurrent * operation may not cause madvise() to * have an undefined result. There may be an * adjacent next vma that we'll walk * next. userfaultfd_remove() will generate an * UFFD_EVENT_REMOVE repetition on the * end-vma->vm_end range, but the manager can * handle a repetition fine. */ end = vma->vm_end; } VM_WARN_ON(start >= end); } if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) return madvise_dontneed_single_vma(vma, start, end); else if (behavior == MADV_FREE) return madvise_free_single_vma(vma, start, end); else return -EINVAL; } static long madvise_populate(struct mm_struct *mm, unsigned long start, unsigned long end, int behavior) { const bool write = behavior == MADV_POPULATE_WRITE; int locked = 1; long pages; while (start < end) { /* Populate (prefault) page tables readable/writable. */ pages = faultin_page_range(mm, start, end, write, &locked); if (!locked) { mmap_read_lock(mm); locked = 1; } if (pages < 0) { switch (pages) { case -EINTR: return -EINTR; case -EINVAL: /* Incompatible mappings / permissions. */ return -EINVAL; case -EHWPOISON: return -EHWPOISON; case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */ return -EFAULT; default: pr_warn_once("%s: unhandled return value: %ld\n", __func__, pages); fallthrough; case -ENOMEM: /* No VMA or out of memory. */ return -ENOMEM; } } start += pages * PAGE_SIZE; } return 0; } /* * Application wants to free up the pages and associated backing store. * This is effectively punching a hole into the middle of a file. */ static long madvise_remove(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { loff_t offset; int error; struct file *f; struct mm_struct *mm = vma->vm_mm; *prev = NULL; /* tell sys_madvise we drop mmap_lock */ if (vma->vm_flags & VM_LOCKED) return -EINVAL; f = vma->vm_file; if (!f || !f->f_mapping || !f->f_mapping->host) { return -EINVAL; } if (!vma_is_shared_maywrite(vma)) return -EACCES; offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); /* * Filesystem's fallocate may need to take i_rwsem. We need to * explicitly grab a reference because the vma (and hence the * vma's reference to the file) can go away as soon as we drop * mmap_lock. */ get_file(f); if (userfaultfd_remove(vma, start, end)) { /* mmap_lock was not released by userfaultfd_remove() */ mmap_read_unlock(mm); } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); fput(f); mmap_read_lock(mm); return error; } static bool is_valid_guard_vma(struct vm_area_struct *vma, bool allow_locked) { vm_flags_t disallowed = VM_SPECIAL | VM_HUGETLB; /* * A user could lock after setting a guard range but that's fine, as * they'd not be able to fault in. The issue arises when we try to zap * existing locked VMAs. We don't want to do that. */ if (!allow_locked) disallowed |= VM_LOCKED; if (!vma_is_anonymous(vma)) return false; if ((vma->vm_flags & (VM_MAYWRITE | disallowed)) != VM_MAYWRITE) return false; return true; } static bool is_guard_pte_marker(pte_t ptent) { return is_pte_marker(ptent) && is_guard_swp_entry(pte_to_swp_entry(ptent)); } static int guard_install_pud_entry(pud_t *pud, unsigned long addr, unsigned long next, struct mm_walk *walk) { pud_t pudval = pudp_get(pud); /* If huge return >0 so we abort the operation + zap. */ return pud_trans_huge(pudval) || pud_devmap(pudval); } static int guard_install_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { pmd_t pmdval = pmdp_get(pmd); /* If huge return >0 so we abort the operation + zap. */ return pmd_trans_huge(pmdval) || pmd_devmap(pmdval); } static int guard_install_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { pte_t pteval = ptep_get(pte); unsigned long *nr_pages = (unsigned long *)walk->private; /* If there is already a guard page marker, we have nothing to do. */ if (is_guard_pte_marker(pteval)) { (*nr_pages)++; return 0; } /* If populated return >0 so we abort the operation + zap. */ return 1; } static int guard_install_set_pte(unsigned long addr, unsigned long next, pte_t *ptep, struct mm_walk *walk) { unsigned long *nr_pages = (unsigned long *)walk->private; /* Simply install a PTE marker, this causes segfault on access. */ *ptep = make_pte_marker(PTE_MARKER_GUARD); (*nr_pages)++; return 0; } static const struct mm_walk_ops guard_install_walk_ops = { .pud_entry = guard_install_pud_entry, .pmd_entry = guard_install_pmd_entry, .pte_entry = guard_install_pte_entry, .install_pte = guard_install_set_pte, .walk_lock = PGWALK_RDLOCK, }; static long madvise_guard_install(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { long err; int i; *prev = vma; if (!is_valid_guard_vma(vma, /* allow_locked = */false)) return -EINVAL; /* * If we install guard markers, then the range is no longer * empty from a page table perspective and therefore it's * appropriate to have an anon_vma. * * This ensures that on fork, we copy page tables correctly. */ err = anon_vma_prepare(vma); if (err) return err; /* * Optimistically try to install the guard marker pages first. If any * non-guard pages are encountered, give up and zap the range before * trying again. * * We try a few times before giving up and releasing back to userland to * loop around, releasing locks in the process to avoid contention. This * would only happen if there was a great many racing page faults. * * In most cases we should simply install the guard markers immediately * with no zap or looping. */ for (i = 0; i < MAX_MADVISE_GUARD_RETRIES; i++) { unsigned long nr_pages = 0; /* Returns < 0 on error, == 0 if success, > 0 if zap needed. */ err = walk_page_range_mm(vma->vm_mm, start, end, &guard_install_walk_ops, &nr_pages); if (err < 0) return err; if (err == 0) { unsigned long nr_expected_pages = PHYS_PFN(end - start); VM_WARN_ON(nr_pages != nr_expected_pages); return 0; } /* * OK some of the range have non-guard pages mapped, zap * them. This leaves existing guard pages in place. */ zap_page_range_single(vma, start, end - start, NULL); } /* * We were unable to install the guard pages due to being raced by page * faults. This should not happen ordinarily. We return to userspace and * immediately retry, relieving lock contention. */ return restart_syscall(); } static int guard_remove_pud_entry(pud_t *pud, unsigned long addr, unsigned long next, struct mm_walk *walk) { pud_t pudval = pudp_get(pud); /* If huge, cannot have guard pages present, so no-op - skip. */ if (pud_trans_huge(pudval) || pud_devmap(pudval)) walk->action = ACTION_CONTINUE; return 0; } static int guard_remove_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { pmd_t pmdval = pmdp_get(pmd); /* If huge, cannot have guard pages present, so no-op - skip. */ if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval)) walk->action = ACTION_CONTINUE; return 0; } static int guard_remove_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { pte_t ptent = ptep_get(pte); if (is_guard_pte_marker(ptent)) { /* Simply clear the PTE marker. */ pte_clear_not_present_full(walk->mm, addr, pte, false); update_mmu_cache(walk->vma, addr, pte); } return 0; } static const struct mm_walk_ops guard_remove_walk_ops = { .pud_entry = guard_remove_pud_entry, .pmd_entry = guard_remove_pmd_entry, .pte_entry = guard_remove_pte_entry, .walk_lock = PGWALK_RDLOCK, }; static long madvise_guard_remove(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { *prev = vma; /* * We're ok with removing guards in mlock()'d ranges, as this is a * non-destructive action. */ if (!is_valid_guard_vma(vma, /* allow_locked = */true)) return -EINVAL; return walk_page_range(vma->vm_mm, start, end, &guard_remove_walk_ops, NULL); } /* * Apply an madvise behavior to a region of a vma. madvise_update_vma * will handle splitting a vm area into separate areas, each area with its own * behavior. */ static int madvise_vma_behavior(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long behavior) { int error; struct anon_vma_name *anon_name; unsigned long new_flags = vma->vm_flags; if (unlikely(!can_modify_vma_madv(vma, behavior))) return -EPERM; switch (behavior) { case MADV_REMOVE: return madvise_remove(vma, prev, start, end); case MADV_WILLNEED: return madvise_willneed(vma, prev, start, end); case MADV_COLD: return madvise_cold(vma, prev, start, end); case MADV_PAGEOUT: return madvise_pageout(vma, prev, start, end); case MADV_FREE: case MADV_DONTNEED: case MADV_DONTNEED_LOCKED: return madvise_dontneed_free(vma, prev, start, end, behavior); case MADV_NORMAL: new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; break; case MADV_SEQUENTIAL: new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; break; case MADV_RANDOM: new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; break; case MADV_DONTFORK: new_flags |= VM_DONTCOPY; break; case MADV_DOFORK: if (vma->vm_flags & VM_IO) return -EINVAL; new_flags &= ~VM_DONTCOPY; break; case MADV_WIPEONFORK: /* MADV_WIPEONFORK is only supported on anonymous memory. */ if (vma->vm_file || vma->vm_flags & VM_SHARED) return -EINVAL; new_flags |= VM_WIPEONFORK; break; case MADV_KEEPONFORK: if (vma->vm_flags & VM_DROPPABLE) return -EINVAL; new_flags &= ~VM_WIPEONFORK; break; case MADV_DONTDUMP: new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: if ((!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) || (vma->vm_flags & VM_DROPPABLE)) return -EINVAL; new_flags &= ~VM_DONTDUMP; break; case MADV_MERGEABLE: case MADV_UNMERGEABLE: error = ksm_madvise(vma, start, end, behavior, &new_flags); if (error) goto out; break; case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: error = hugepage_madvise(vma, &new_flags, behavior); if (error) goto out; break; case MADV_COLLAPSE: return madvise_collapse(vma, prev, start, end); case MADV_GUARD_INSTALL: return madvise_guard_install(vma, prev, start, end); case MADV_GUARD_REMOVE: return madvise_guard_remove(vma, prev, start, end); } anon_name = anon_vma_name(vma); anon_vma_name_get(anon_name); error = madvise_update_vma(vma, prev, start, end, new_flags, anon_name); anon_vma_name_put(anon_name); out: /* * madvise() returns EAGAIN if kernel resources, such as * slab, are temporarily unavailable. */ if (error == -ENOMEM) error = -EAGAIN; return error; } #ifdef CONFIG_MEMORY_FAILURE /* * Error injection support for memory error handling. */ static int madvise_inject_error(int behavior, unsigned long start, unsigned long end) { unsigned long size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; for (; start < end; start += size) { unsigned long pfn; struct page *page; int ret; ret = get_user_pages_fast(start, 1, 0, &page); if (ret != 1) return ret; pfn = page_to_pfn(page); /* * When soft offlining hugepages, after migrating the page * we dissolve it, therefore in the second loop "page" will * no longer be a compound page. */ size = page_size(compound_head(page)); if (behavior == MADV_SOFT_OFFLINE) { pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n", pfn, start); ret = soft_offline_page(pfn, MF_COUNT_INCREASED); } else { pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", pfn, start); ret = memory_failure(pfn, MF_ACTION_REQUIRED | MF_COUNT_INCREASED | MF_SW_SIMULATED); if (ret == -EOPNOTSUPP) ret = 0; } if (ret) return ret; } return 0; } #endif static bool madvise_behavior_valid(int behavior) { switch (behavior) { case MADV_DOFORK: case MADV_DONTFORK: case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: case MADV_DONTNEED_LOCKED: case MADV_FREE: case MADV_COLD: case MADV_PAGEOUT: case MADV_POPULATE_READ: case MADV_POPULATE_WRITE: #ifdef CONFIG_KSM case MADV_MERGEABLE: case MADV_UNMERGEABLE: #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: case MADV_COLLAPSE: #endif case MADV_DONTDUMP: case MADV_DODUMP: case MADV_WIPEONFORK: case MADV_KEEPONFORK: case MADV_GUARD_INSTALL: case MADV_GUARD_REMOVE: #ifdef CONFIG_MEMORY_FAILURE case MADV_SOFT_OFFLINE: case MADV_HWPOISON: #endif return true; default: return false; } } /* Can we invoke process_madvise() on a remote mm for the specified behavior? */ static bool process_madvise_remote_valid(int behavior) { switch (behavior) { case MADV_COLD: case MADV_PAGEOUT: case MADV_WILLNEED: case MADV_COLLAPSE: return true; default: return false; } } /* * Walk the vmas in range [start,end), and call the visit function on each one. * The visit function will get start and end parameters that cover the overlap * between the current vma and the original range. Any unmapped regions in the * original range will result in this function returning -ENOMEM while still * calling the visit function on all of the existing vmas in the range. * Must be called with the mmap_lock held for reading or writing. */ static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long arg, int (*visit)(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long arg)) { struct vm_area_struct *vma; struct vm_area_struct *prev; unsigned long tmp; int unmapped_error = 0; /* * If the interval [start,end) covers some unmapped address * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ vma = find_vma_prev(mm, start, &prev); if (vma && start > vma->vm_start) prev = vma; for (;;) { int error; /* Still start < end. */ if (!vma) return -ENOMEM; /* Here start < (end|vma->vm_end). */ if (start < vma->vm_start) { unmapped_error = -ENOMEM; start = vma->vm_start; if (start >= end) break; } /* Here vma->vm_start <= start < (end|vma->vm_end) */ tmp = vma->vm_end; if (end < tmp) tmp = end; /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ error = visit(vma, &prev, start, tmp, arg); if (error) return error; start = tmp; if (prev && start < prev->vm_end) start = prev->vm_end; if (start >= end) break; if (prev) vma = find_vma(mm, prev->vm_end); else /* madvise_remove dropped mmap_lock */ vma = find_vma(mm, start); } return unmapped_error; } #ifdef CONFIG_ANON_VMA_NAME static int madvise_vma_anon_name(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long anon_name) { int error; /* Only anonymous mappings can be named */ if (vma->vm_file && !vma_is_anon_shmem(vma)) return -EBADF; error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, (struct anon_vma_name *)anon_name); /* * madvise() returns EAGAIN if kernel resources, such as * slab, are temporarily unavailable. */ if (error == -ENOMEM) error = -EAGAIN; return error; } int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, unsigned long len_in, struct anon_vma_name *anon_name) { unsigned long end; unsigned long len; if (start & ~PAGE_MASK) return -EINVAL; len = (len_in + ~PAGE_MASK) & PAGE_MASK; /* Check to see whether len was rounded up from small -ve to zero */ if (len_in && !len) return -EINVAL; end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, madvise_vma_anon_name); } #endif /* CONFIG_ANON_VMA_NAME */ /* * The madvise(2) system call. * * Applications can use madvise() to advise the kernel how it should * handle paging I/O in this VM area. The idea is to help the kernel * use appropriate read-ahead and caching techniques. The information * provided is advisory only, and can be safely disregarded by the * kernel without affecting the correct operation of the application. * * behavior values: * MADV_NORMAL - the default behavior is to read clusters. This * results in some read-ahead and read-behind. * MADV_RANDOM - the system should read the minimum amount of data * on any access, since it is unlikely that the appli- * cation will need more than what it asks for. * MADV_SEQUENTIAL - pages in the given range will probably be accessed * once, so they can be aggressively read ahead, and * can be freed soon after they are accessed. * MADV_WILLNEED - the application is notifying the system to read * some pages ahead. * MADV_DONTNEED - the application is finished with the given range, * so the kernel can free resources associated with it. * MADV_FREE - the application marks pages in the given range as lazy free, * where actual purges are postponed until memory pressure happens. * MADV_REMOVE - the application wants to free up the given range of * pages and associated backing store. * MADV_DONTFORK - omit this area from child's address space when forking: * typically, to avoid COWing pages pinned by get_user_pages(). * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. * MADV_WIPEONFORK - present the child process with zero-filled memory in this * range after a fork. * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK * MADV_HWPOISON - trigger memory error handler as if the given memory range * were corrupted by unrecoverable hardware memory failure. * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. * MADV_MERGEABLE - the application recommends that KSM try to merge pages in * this area with pages of identical content from other such areas. * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. * MADV_HUGEPAGE - the application wants to back the given range by transparent * huge pages in the future. Existing pages might be coalesced and * new pages might be allocated as THP. * MADV_NOHUGEPAGE - mark the given range as not worth being backed by * transparent huge pages so the existing pages will not be * coalesced into THP and new pages will not be allocated as THP. * MADV_COLLAPSE - synchronously coalesce pages into new THP. * MADV_DONTDUMP - the application wants to prevent pages in the given range * from being included in its core dump. * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. * MADV_COLD - the application is not expected to use this memory soon, * deactivate pages in this range so that they can be reclaimed * easily if memory pressure happens. * MADV_PAGEOUT - the application is not expected to use this memory soon, * page out the pages in this range immediately. * MADV_POPULATE_READ - populate (prefault) page tables readable by * triggering read faults if required * MADV_POPULATE_WRITE - populate (prefault) page tables writable by * triggering write faults if required * * return values: * zero - success * -EINVAL - start + len < 0, start is not page-aligned, * "behavior" is not a valid value, or application * is attempting to release locked or shared pages, * or the specified address range includes file, Huge TLB, * MAP_SHARED or VMPFNMAP range. * -ENOMEM - addresses in the specified range are not currently * mapped, or are outside the AS of the process. * -EIO - an I/O error occurred while paging in data. * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. * -EPERM - memory is sealed. */ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { unsigned long end; int error; int write; size_t len; struct blk_plug plug; if (!madvise_behavior_valid(behavior)) return -EINVAL; if (!PAGE_ALIGNED(start)) return -EINVAL; len = PAGE_ALIGN(len_in); /* Check to see whether len was rounded up from small -ve to zero */ if (len_in && !len) return -EINVAL; end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; #ifdef CONFIG_MEMORY_FAILURE if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) return madvise_inject_error(behavior, start, start + len_in); #endif write = madvise_need_mmap_write(behavior); if (write) { if (mmap_write_lock_killable(mm)) return -EINTR; } else { mmap_read_lock(mm); } start = untagged_addr_remote(mm, start); end = start + len; blk_start_plug(&plug); switch (behavior) { case MADV_POPULATE_READ: case MADV_POPULATE_WRITE: error = madvise_populate(mm, start, end, behavior); break; default: error = madvise_walk_vmas(mm, start, end, behavior, madvise_vma_behavior); break; } blk_finish_plug(&plug); if (write) mmap_write_unlock(mm); else mmap_read_unlock(mm); return error; } SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { return do_madvise(current->mm, start, len_in, behavior); } /* Perform an madvise operation over a vector of addresses and lengths. */ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, int behavior) { ssize_t ret = 0; size_t total_len; total_len = iov_iter_count(iter); while (iov_iter_count(iter)) { ret = do_madvise(mm, (unsigned long)iter_iov_addr(iter), iter_iov_len(iter), behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat * the operation in aggregate, and would be surprising to the * user. * * As we have already dropped locks, it is safe to just loop and * try again. We check for fatal signals in case we need exit * early anyway. */ if (ret == -ERESTARTNOINTR) { if (fatal_signal_pending(current)) { ret = -EINTR; break; } continue; } if (ret < 0) break; iov_iter_advance(iter, iter_iov_len(iter)); } ret = (total_len - iov_iter_count(iter)) ? : ret; return ret; } SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, size_t, vlen, int, behavior, unsigned int, flags) { ssize_t ret; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; struct task_struct *task; struct mm_struct *mm; unsigned int f_flags; if (flags != 0) { ret = -EINVAL; goto out; } ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); if (ret < 0) goto out; task = pidfd_get_task(pidfd, &f_flags); if (IS_ERR(task)) { ret = PTR_ERR(task); goto free_iov; } /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR(mm)) { ret = PTR_ERR(mm); goto release_task; } /* * We need only perform this check if we are attempting to manipulate a * remote process's address space. */ if (mm != current->mm && !process_madvise_remote_valid(behavior)) { ret = -EINVAL; goto release_mm; } /* * Require CAP_SYS_NICE for influencing process performance. Note that * only non-destructive hints are currently supported for remote * processes. */ if (mm != current->mm && !capable(CAP_SYS_NICE)) { ret = -EPERM; goto release_mm; } ret = vector_madvise(mm, &iter, behavior); release_mm: mmput(mm); release_task: put_task_struct(task); free_iov: kfree(iov); out: return ret; }
4 4 4 14 14 14 4 4 14 14 14 14 14 4 4 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 // SPDX-License-Identifier: GPL-2.0-only /* * The NFC Controller Interface is the communication protocol between an * NFC Controller (NFCC) and a Device Host (DH). * * Copyright (C) 2011 Texas Instruments, Inc. * Copyright (C) 2014 Marvell International Ltd. * * Written by Ilan Elias <ilane@ti.com> * * Acknowledgements: * This file is based on hci_core.c, which was written * by Maxim Krasnyansky. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/completion.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/kcov.h> #include "../nfc.h" #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include <linux/nfc.h> struct core_conn_create_data { int length; struct nci_core_conn_create_cmd *cmd; }; static void nci_cmd_work(struct work_struct *work); static void nci_rx_work(struct work_struct *work); static void nci_tx_work(struct work_struct *work); struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, int conn_id) { struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { if (conn_info->conn_id == conn_id) return conn_info; } return NULL; } int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, const struct dest_spec_params *params) { const struct nci_conn_info *conn_info; list_for_each_entry(conn_info, &ndev->conn_info_list, list) { if (conn_info->dest_type == dest_type) { if (!params) return conn_info->conn_id; if (params->id == conn_info->dest_params->id && params->protocol == conn_info->dest_params->protocol) return conn_info->conn_id; } } return -EINVAL; } EXPORT_SYMBOL(nci_get_conn_info_by_dest_type_params); /* ---- NCI requests ---- */ void nci_req_complete(struct nci_dev *ndev, int result) { if (ndev->req_status == NCI_REQ_PEND) { ndev->req_result = result; ndev->req_status = NCI_REQ_DONE; complete(&ndev->req_completion); } } EXPORT_SYMBOL(nci_req_complete); static void nci_req_cancel(struct nci_dev *ndev, int err) { if (ndev->req_status == NCI_REQ_PEND) { ndev->req_result = err; ndev->req_status = NCI_REQ_CANCELED; complete(&ndev->req_completion); } } /* Execute request and wait for completion. */ static int __nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, const void *opt), const void *opt, __u32 timeout) { int rc = 0; long completion_rc; ndev->req_status = NCI_REQ_PEND; reinit_completion(&ndev->req_completion); req(ndev, opt); completion_rc = wait_for_completion_interruptible_timeout(&ndev->req_completion, timeout); pr_debug("wait_for_completion return %ld\n", completion_rc); if (completion_rc > 0) { switch (ndev->req_status) { case NCI_REQ_DONE: rc = nci_to_errno(ndev->req_result); break; case NCI_REQ_CANCELED: rc = -ndev->req_result; break; default: rc = -ETIMEDOUT; break; } } else { pr_err("wait_for_completion_interruptible_timeout failed %ld\n", completion_rc); rc = ((completion_rc == 0) ? (-ETIMEDOUT) : (completion_rc)); } ndev->req_status = ndev->req_result = 0; return rc; } inline int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, const void *opt), const void *opt, __u32 timeout) { int rc; /* Serialize all requests */ mutex_lock(&ndev->req_lock); /* check the state after obtaing the lock against any races * from nci_close_device when the device gets removed. */ if (test_bit(NCI_UP, &ndev->flags)) rc = __nci_request(ndev, req, opt, timeout); else rc = -ENETDOWN; mutex_unlock(&ndev->req_lock); return rc; } static void nci_reset_req(struct nci_dev *ndev, const void *opt) { struct nci_core_reset_cmd cmd; cmd.reset_type = NCI_RESET_TYPE_RESET_CONFIG; nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd); } static void nci_init_req(struct nci_dev *ndev, const void *opt) { u8 plen = 0; if (opt) plen = sizeof(struct nci_core_init_v2_cmd); nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt); } static void nci_init_complete_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_disc_map_cmd cmd; struct disc_map_config *cfg = cmd.mapping_configs; __u8 *num = &cmd.num_mapping_configs; int i; /* set rf mapping configurations */ *num = 0; /* by default mapping is set to NCI_RF_INTERFACE_FRAME */ for (i = 0; i < ndev->num_supported_rf_interfaces; i++) { if (ndev->supported_rf_interfaces[i] == NCI_RF_INTERFACE_ISO_DEP) { cfg[*num].rf_protocol = NCI_RF_PROTOCOL_ISO_DEP; cfg[*num].mode = NCI_DISC_MAP_MODE_POLL | NCI_DISC_MAP_MODE_LISTEN; cfg[*num].rf_interface = NCI_RF_INTERFACE_ISO_DEP; (*num)++; } else if (ndev->supported_rf_interfaces[i] == NCI_RF_INTERFACE_NFC_DEP) { cfg[*num].rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; cfg[*num].mode = NCI_DISC_MAP_MODE_POLL | NCI_DISC_MAP_MODE_LISTEN; cfg[*num].rf_interface = NCI_RF_INTERFACE_NFC_DEP; (*num)++; } if (*num == NCI_MAX_NUM_MAPPING_CONFIGS) break; } nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_MAP_CMD, (1 + ((*num) * sizeof(struct disc_map_config))), &cmd); } struct nci_set_config_param { __u8 id; size_t len; const __u8 *val; }; static void nci_set_config_req(struct nci_dev *ndev, const void *opt) { const struct nci_set_config_param *param = opt; struct nci_core_set_config_cmd cmd; BUG_ON(param->len > NCI_MAX_PARAM_LEN); cmd.num_params = 1; cmd.param.id = param->id; cmd.param.len = param->len; memcpy(cmd.param.val, param->val, param->len); nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd); } struct nci_rf_discover_param { __u32 im_protocols; __u32 tm_protocols; }; static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt) { const struct nci_rf_discover_param *param = opt; struct nci_rf_disc_cmd cmd; cmd.num_disc_configs = 0; if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_JEWEL_MASK || param->im_protocols & NFC_PROTO_MIFARE_MASK || param->im_protocols & NFC_PROTO_ISO14443_MASK || param->im_protocols & NFC_PROTO_NFC_DEP_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_A_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_ISO14443_B_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_B_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_FELICA_MASK || param->im_protocols & NFC_PROTO_NFC_DEP_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_F_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && (param->im_protocols & NFC_PROTO_ISO15693_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_V_PASSIVE_POLL_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS - 1) && (param->tm_protocols & NFC_PROTO_NFC_DEP_MASK)) { cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_A_PASSIVE_LISTEN_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = NCI_NFC_F_PASSIVE_LISTEN_MODE; cmd.disc_configs[cmd.num_disc_configs].frequency = 1; cmd.num_disc_configs++; } nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_CMD, (1 + (cmd.num_disc_configs * sizeof(struct disc_config))), &cmd); } struct nci_rf_discover_select_param { __u8 rf_discovery_id; __u8 rf_protocol; }; static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt) { const struct nci_rf_discover_select_param *param = opt; struct nci_rf_discover_select_cmd cmd; cmd.rf_discovery_id = param->rf_discovery_id; cmd.rf_protocol = param->rf_protocol; switch (cmd.rf_protocol) { case NCI_RF_PROTOCOL_ISO_DEP: cmd.rf_interface = NCI_RF_INTERFACE_ISO_DEP; break; case NCI_RF_PROTOCOL_NFC_DEP: cmd.rf_interface = NCI_RF_INTERFACE_NFC_DEP; break; default: cmd.rf_interface = NCI_RF_INTERFACE_FRAME; break; } nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_SELECT_CMD, sizeof(struct nci_rf_discover_select_cmd), &cmd); } static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_deactivate_cmd cmd; cmd.type = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD, sizeof(struct nci_rf_deactivate_cmd), &cmd); } struct nci_cmd_param { __u16 opcode; size_t len; const __u8 *payload; }; static void nci_generic_req(struct nci_dev *ndev, const void *opt) { const struct nci_cmd_param *param = opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload) { struct nci_cmd_param param; param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid); param.len = len; param.payload = payload; return __nci_request(ndev, nci_generic_req, &param, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_prop_cmd); int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, const __u8 *payload) { struct nci_cmd_param param; param.opcode = opcode; param.len = len; param.payload = payload; return __nci_request(ndev, nci_generic_req, &param, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_cmd); int nci_core_reset(struct nci_dev *ndev) { return __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } EXPORT_SYMBOL(nci_core_reset); int nci_core_init(struct nci_dev *ndev) { return __nci_request(ndev, nci_init_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } EXPORT_SYMBOL(nci_core_init); struct nci_loopback_data { u8 conn_id; struct sk_buff *data; }; static void nci_send_data_req(struct nci_dev *ndev, const void *opt) { const struct nci_loopback_data *data = opt; nci_send_data(ndev, data->conn_id, data->data); } static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err) { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_conn_info *conn_info; conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); if (!conn_info) { nci_req_complete(ndev, NCI_STATUS_REJECTED); return; } conn_info->rx_skb = skb; nci_req_complete(ndev, NCI_STATUS_OK); } int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp) { int r; struct nci_loopback_data loopback_data; struct nci_conn_info *conn_info; struct sk_buff *skb; int conn_id = nci_get_conn_info_by_dest_type_params(ndev, NCI_DESTINATION_NFCC_LOOPBACK, NULL); if (conn_id < 0) { r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCC_LOOPBACK, 0, 0, NULL); if (r != NCI_STATUS_OK) return r; conn_id = nci_get_conn_info_by_dest_type_params(ndev, NCI_DESTINATION_NFCC_LOOPBACK, NULL); } conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) return -EPROTO; /* store cb and context to be used on receiving data */ conn_info->data_exchange_cb = nci_nfcc_loopback_cb; conn_info->data_exchange_cb_context = ndev; skb = nci_skb_alloc(ndev, NCI_DATA_HDR_SIZE + data_len, GFP_KERNEL); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE); skb_put_data(skb, data, data_len); loopback_data.conn_id = conn_id; loopback_data.data = skb; ndev->cur_conn_id = conn_id; r = nci_request(ndev, nci_send_data_req, &loopback_data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK && resp) *resp = conn_info->rx_skb; return r; } EXPORT_SYMBOL(nci_nfcc_loopback); static int nci_open_device(struct nci_dev *ndev) { int rc = 0; mutex_lock(&ndev->req_lock); if (test_bit(NCI_UNREG, &ndev->flags)) { rc = -ENODEV; goto done; } if (test_bit(NCI_UP, &ndev->flags)) { rc = -EALREADY; goto done; } if (ndev->ops->open(ndev)) { rc = -EIO; goto done; } atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); if (ndev->ops->init) rc = ndev->ops->init(ndev); if (!rc) { rc = __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } if (!rc && ndev->ops->setup) { rc = ndev->ops->setup(ndev); } if (!rc) { struct nci_core_init_v2_cmd nci_init_v2_cmd = { .feature1 = NCI_FEATURE_DISABLE, .feature2 = NCI_FEATURE_DISABLE }; const void *opt = NULL; if (ndev->nci_ver & NCI_VER_2_MASK) opt = &nci_init_v2_cmd; rc = __nci_request(ndev, nci_init_req, opt, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } if (!rc && ndev->ops->post_setup) rc = ndev->ops->post_setup(ndev); if (!rc) { rc = __nci_request(ndev, nci_init_complete_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } clear_bit(NCI_INIT, &ndev->flags); if (!rc) { set_bit(NCI_UP, &ndev->flags); nci_clear_target_list(ndev); atomic_set(&ndev->state, NCI_IDLE); } else { /* Init failed, cleanup */ skb_queue_purge(&ndev->cmd_q); skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); ndev->flags &= BIT(NCI_UNREG); } done: mutex_unlock(&ndev->req_lock); return rc; } static int nci_close_device(struct nci_dev *ndev) { nci_req_cancel(ndev, ENODEV); /* This mutex needs to be held as a barrier for * caller nci_unregister_device */ mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { /* Need to flush the cmd wq in case * there is a queued/running cmd_work */ flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); return 0; } /* Drop RX and TX queues */ skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); /* Flush RX and TX wq */ flush_workqueue(ndev->rx_wq); flush_workqueue(ndev->tx_wq); /* Reset device */ skb_queue_purge(&ndev->cmd_q); atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty * and no works are scheduled. */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); /* Clear flags except NCI_UNREG */ ndev->flags &= BIT(NCI_UNREG); mutex_unlock(&ndev->req_lock); return 0; } /* NCI command timer function */ static void nci_cmd_timer(struct timer_list *t) { struct nci_dev *ndev = from_timer(ndev, t, cmd_timer); atomic_set(&ndev->cmd_cnt, 1); queue_work(ndev->cmd_wq, &ndev->cmd_work); } /* NCI data exchange timer function */ static void nci_data_timer(struct timer_list *t) { struct nci_dev *ndev = from_timer(ndev, t, data_timer); set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); queue_work(ndev->rx_wq, &ndev->rx_work); } static int nci_dev_up(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); return nci_open_device(ndev); } static int nci_dev_down(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); return nci_close_device(ndev); } int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val) { struct nci_set_config_param param; if (!val || !len) return 0; param.id = id; param.len = len; param.val = val; return __nci_request(ndev, nci_set_config_req, &param, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } EXPORT_SYMBOL(nci_set_config); static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt) { struct nci_nfcee_discover_cmd cmd; __u8 action = (unsigned long)opt; cmd.discovery_action = action; nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd); } int nci_nfcee_discover(struct nci_dev *ndev, u8 action) { unsigned long opt = action; return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_discover); static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt) { const struct nci_nfcee_mode_set_cmd *cmd = opt; nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, sizeof(struct nci_nfcee_mode_set_cmd), cmd); } int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) { struct nci_nfcee_mode_set_cmd cmd; cmd.nfcee_id = nfcee_id; cmd.nfcee_mode = nfcee_mode; return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_mode_set); static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt) { const struct core_conn_create_data *data = opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); } int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, const struct core_conn_create_dest_spec_params *params) { int r; struct nci_core_conn_create_cmd *cmd; struct core_conn_create_data data; data.length = params_len + sizeof(struct nci_core_conn_create_cmd); cmd = kzalloc(data.length, GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->destination_type = destination_type; cmd->number_destination_params = number_destination_params; data.cmd = cmd; if (params) { memcpy(cmd->params, params, params_len); if (params->length > 0) memcpy(&ndev->cur_params, &params->value[DEST_SPEC_PARAMS_ID_INDEX], sizeof(struct dest_spec_params)); else ndev->cur_params.id = 0; } else { ndev->cur_params.id = 0; } ndev->cur_dest_type = destination_type; r = __nci_request(ndev, nci_core_conn_create_req, &data, msecs_to_jiffies(NCI_CMD_TIMEOUT)); kfree(cmd); return r; } EXPORT_SYMBOL(nci_core_conn_create); static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt) { __u8 conn_id = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); } int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { unsigned long opt = conn_id; ndev->cur_conn_id = conn_id; return __nci_request(ndev, nci_core_conn_close_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_conn_close); static void nci_set_target_ats(struct nfc_target *target, struct nci_dev *ndev) { if (ndev->target_ats_len > 0) { target->ats_len = ndev->target_ats_len; memcpy(target->ats, ndev->target_ats, target->ats_len); } } static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_set_config_param param; int rc; param.val = nfc_get_local_general_bytes(nfc_dev, &param.len); if ((param.val == NULL) || (param.len == 0)) return 0; if (param.len > NFC_MAX_GT_LEN) return -EINVAL; param.id = NCI_PN_ATR_REQ_GEN_BYTES; rc = nci_request(ndev, nci_set_config_req, &param, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); if (rc) return rc; param.id = NCI_LN_ATR_RES_GEN_BYTES; return nci_request(ndev, nci_set_config_req, &param, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } static int nci_set_listen_parameters(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; __u8 val; val = NCI_LA_SEL_INFO_NFC_DEP_MASK; rc = nci_set_config(ndev, NCI_LA_SEL_INFO, 1, &val); if (rc) return rc; val = NCI_LF_PROTOCOL_TYPE_NFC_DEP_MASK; rc = nci_set_config(ndev, NCI_LF_PROTOCOL_TYPE, 1, &val); if (rc) return rc; val = NCI_LF_CON_BITR_F_212 | NCI_LF_CON_BITR_F_424; return nci_set_config(ndev, NCI_LF_CON_BITR_F, 1, &val); } static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_rf_discover_param param; int rc; if ((atomic_read(&ndev->state) == NCI_DISCOVERY) || (atomic_read(&ndev->state) == NCI_W4_ALL_DISCOVERIES)) { pr_err("unable to start poll, since poll is already active\n"); return -EBUSY; } if (ndev->target_active_prot) { pr_err("there is an active target\n"); return -EBUSY; } if ((atomic_read(&ndev->state) == NCI_W4_HOST_SELECT) || (atomic_read(&ndev->state) == NCI_POLL_ACTIVE)) { pr_debug("target active or w4 select, implicitly deactivate\n"); rc = nci_request(ndev, nci_rf_deactivate_req, (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); if (rc) return -EBUSY; } if ((im_protocols | tm_protocols) & NFC_PROTO_NFC_DEP_MASK) { rc = nci_set_local_general_bytes(nfc_dev); if (rc) { pr_err("failed to set local general bytes\n"); return rc; } } if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) { rc = nci_set_listen_parameters(nfc_dev); if (rc) pr_err("failed to set listen parameters\n"); } param.im_protocols = im_protocols; param.tm_protocols = tm_protocols; rc = nci_request(ndev, nci_rf_discover_req, &param, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) ndev->poll_prots = im_protocols; return rc; } static void nci_stop_poll(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if ((atomic_read(&ndev->state) != NCI_DISCOVERY) && (atomic_read(&ndev->state) != NCI_W4_ALL_DISCOVERIES)) { pr_err("unable to stop poll, since poll is not active\n"); return; } nci_request(ndev, nci_rf_deactivate_req, (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } static int nci_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u32 protocol) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_rf_discover_select_param param; const struct nfc_target *nci_target = NULL; int i; int rc = 0; pr_debug("target_idx %d, protocol 0x%x\n", target->idx, protocol); if ((atomic_read(&ndev->state) != NCI_W4_HOST_SELECT) && (atomic_read(&ndev->state) != NCI_POLL_ACTIVE)) { pr_err("there is no available target to activate\n"); return -EINVAL; } if (ndev->target_active_prot) { pr_err("there is already an active target\n"); return -EBUSY; } for (i = 0; i < ndev->n_targets; i++) { if (ndev->targets[i].idx == target->idx) { nci_target = &ndev->targets[i]; break; } } if (!nci_target) { pr_err("unable to find the selected target\n"); return -EINVAL; } if (protocol >= NFC_PROTO_MAX) { pr_err("the requested nfc protocol is invalid\n"); return -EINVAL; } if (!(nci_target->supported_protocols & (1 << protocol))) { pr_err("target does not support the requested protocol 0x%x\n", protocol); return -EINVAL; } if (atomic_read(&ndev->state) == NCI_W4_HOST_SELECT) { param.rf_discovery_id = nci_target->logical_idx; if (protocol == NFC_PROTO_JEWEL) param.rf_protocol = NCI_RF_PROTOCOL_T1T; else if (protocol == NFC_PROTO_MIFARE) param.rf_protocol = NCI_RF_PROTOCOL_T2T; else if (protocol == NFC_PROTO_FELICA) param.rf_protocol = NCI_RF_PROTOCOL_T3T; else if (protocol == NFC_PROTO_ISO14443 || protocol == NFC_PROTO_ISO14443_B) param.rf_protocol = NCI_RF_PROTOCOL_ISO_DEP; else param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; rc = nci_request(ndev, nci_rf_discover_select_req, &param, msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT)); } if (!rc) { ndev->target_active_prot = protocol; if (protocol == NFC_PROTO_ISO14443) nci_set_target_ats(target, ndev); } return rc; } static void nci_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 mode) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); return; } ndev->target_active_prot = 0; switch (mode) { case NFC_TARGET_MODE_SLEEP: nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE; break; } if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } } static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 comm_mode, __u8 *gb, size_t gb_len) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode); rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP); if (rc) return rc; rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb, ndev->remote_gb_len); if (!rc) rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE, NFC_RF_INITIATOR); return rc; } static int nci_dep_link_down(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE); } else { if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || atomic_read(&ndev->state) == NCI_DISCOVERY) { nci_request(ndev, nci_rf_deactivate_req, (void *)0, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } rc = nfc_tm_deactivated(nfc_dev); if (rc) pr_err("error when signaling tm deactivation\n"); } return 0; } static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; struct nci_conn_info *conn_info; conn_info = ndev->rf_conn_info; if (!conn_info) return -EPROTO; pr_debug("target_idx %d, len %d\n", target->idx, skb->len); if (!ndev->target_active_prot) { pr_err("unable to exchange data, no active target\n"); return -EINVAL; } if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) return -EBUSY; /* store cb and context to be used on receiving data */ conn_info->data_exchange_cb = cb; conn_info->data_exchange_cb_context = cb_context; rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); if (rc) clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); return rc; } static int nci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); if (rc) pr_err("unable to send data\n"); return rc; } static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->enable_se) return ndev->ops->enable_se(ndev, se_idx); return 0; } static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->disable_se) return ndev->ops->disable_se(ndev, se_idx); return 0; } static int nci_discover_se(struct nfc_dev *nfc_dev) { int r; struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->discover_se) { r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE); if (r != NCI_STATUS_OK) return -EPROTO; return ndev->ops->discover_se(ndev); } return 0; } static int nci_se_io(struct nfc_dev *nfc_dev, u32 se_idx, u8 *apdu, size_t apdu_length, se_io_cb_t cb, void *cb_context) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (ndev->ops->se_io) return ndev->ops->se_io(ndev, se_idx, apdu, apdu_length, cb, cb_context); return 0; } static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); if (!ndev->ops->fw_download) return -ENOTSUPP; return ndev->ops->fw_download(ndev, firmware_name); } static const struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, .start_poll = nci_start_poll, .stop_poll = nci_stop_poll, .dep_link_up = nci_dep_link_up, .dep_link_down = nci_dep_link_down, .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, .tm_send = nci_tm_send, .enable_se = nci_enable_se, .disable_se = nci_disable_se, .discover_se = nci_discover_se, .se_io = nci_se_io, .fw_download = nci_fw_download, }; /* ---- Interface to NCI drivers ---- */ /** * nci_allocate_device - allocate a new nci device * * @ops: device operations * @supported_protocols: NFC protocols supported by the device * @tx_headroom: Reserved space at beginning of skb * @tx_tailroom: Reserved space at end of skb */ struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; pr_debug("supported_protocols 0x%x\n", supported_protocols); if (!ops->open || !ops->close || !ops->send) return NULL; if (!supported_protocols) return NULL; ndev = kzalloc(sizeof(struct nci_dev), GFP_KERNEL); if (!ndev) return NULL; ndev->ops = ops; if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) { pr_err("Too many proprietary commands: %zd\n", ops->n_prop_ops); goto free_nci; } ndev->tx_headroom = tx_headroom; ndev->tx_tailroom = tx_tailroom; init_completion(&ndev->req_completion); ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) goto free_nci; ndev->hci_dev = nci_hci_allocate(ndev); if (!ndev->hci_dev) goto free_nfc; nfc_set_drvdata(ndev->nfc_dev, ndev); return ndev; free_nfc: nfc_free_device(ndev->nfc_dev); free_nci: kfree(ndev); return NULL; } EXPORT_SYMBOL(nci_allocate_device); /** * nci_free_device - deallocate nci device * * @ndev: The nci device to deallocate */ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); /* drop partial rx data packet if present */ if (ndev->rx_data_reassembly) kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); /** * nci_register_device - register a nci device in the nfc subsystem * * @ndev: The nci device to register */ int nci_register_device(struct nci_dev *ndev) { int rc; struct device *dev = &ndev->nfc_dev->dev; char name[32]; ndev->flags = 0; INIT_WORK(&ndev->cmd_work, nci_cmd_work); snprintf(name, sizeof(name), "%s_nci_cmd_wq", dev_name(dev)); ndev->cmd_wq = create_singlethread_workqueue(name); if (!ndev->cmd_wq) { rc = -ENOMEM; goto exit; } INIT_WORK(&ndev->rx_work, nci_rx_work); snprintf(name, sizeof(name), "%s_nci_rx_wq", dev_name(dev)); ndev->rx_wq = create_singlethread_workqueue(name); if (!ndev->rx_wq) { rc = -ENOMEM; goto destroy_cmd_wq_exit; } INIT_WORK(&ndev->tx_work, nci_tx_work); snprintf(name, sizeof(name), "%s_nci_tx_wq", dev_name(dev)); ndev->tx_wq = create_singlethread_workqueue(name); if (!ndev->tx_wq) { rc = -ENOMEM; goto destroy_rx_wq_exit; } skb_queue_head_init(&ndev->cmd_q); skb_queue_head_init(&ndev->rx_q); skb_queue_head_init(&ndev->tx_q); timer_setup(&ndev->cmd_timer, nci_cmd_timer, 0); timer_setup(&ndev->data_timer, nci_data_timer, 0); mutex_init(&ndev->req_lock); INIT_LIST_HEAD(&ndev->conn_info_list); rc = nfc_register_device(ndev->nfc_dev); if (rc) goto destroy_tx_wq_exit; goto exit; destroy_tx_wq_exit: destroy_workqueue(ndev->tx_wq); destroy_rx_wq_exit: destroy_workqueue(ndev->rx_wq); destroy_cmd_wq_exit: destroy_workqueue(ndev->cmd_wq); exit: return rc; } EXPORT_SYMBOL(nci_register_device); /** * nci_unregister_device - unregister a nci device in the nfc subsystem * * @ndev: The nci device to unregister */ void nci_unregister_device(struct nci_dev *ndev) { struct nci_conn_info *conn_info, *n; /* This set_bit is not protected with specialized barrier, * However, it is fine because the mutex_lock(&ndev->req_lock); * in nci_close_device() will help to emit one. */ set_bit(NCI_UNREG, &ndev->flags); nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); destroy_workqueue(ndev->rx_wq); destroy_workqueue(ndev->tx_wq); list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) { list_del(&conn_info->list); /* conn_info is allocated with devm_kzalloc */ } nfc_unregister_device(ndev->nfc_dev); } EXPORT_SYMBOL(nci_unregister_device); /** * nci_recv_frame - receive frame from NCI drivers * * @ndev: The nci device * @skb: The sk_buff to receive */ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) { pr_debug("len %d\n", skb->len); if (!ndev || (!test_bit(NCI_UP, &ndev->flags) && !test_bit(NCI_INIT, &ndev->flags))) { kfree_skb(skb); return -ENXIO; } /* Queue frame for rx worker thread */ skb_queue_tail(&ndev->rx_q, skb); queue_work(ndev->rx_wq, &ndev->rx_work); return 0; } EXPORT_SYMBOL(nci_recv_frame); int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) { pr_debug("len %d\n", skb->len); if (!ndev) { kfree_skb(skb); return -ENODEV; } /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); /* Send copy to sniffer */ nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_TX); return ndev->ops->send(ndev, skb); } EXPORT_SYMBOL(nci_send_frame); /* Send NCI command */ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload) { struct nci_ctrl_hdr *hdr; struct sk_buff *skb; pr_debug("opcode 0x%x, plen %d\n", opcode, plen); skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + plen), GFP_KERNEL); if (!skb) { pr_err("no memory for command\n"); return -ENOMEM; } hdr = skb_put(skb, NCI_CTRL_HDR_SIZE); hdr->gid = nci_opcode_gid(opcode); hdr->oid = nci_opcode_oid(opcode); hdr->plen = plen; nci_mt_set((__u8 *)hdr, NCI_MT_CMD_PKT); nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST); if (plen) skb_put_data(skb, payload, plen); skb_queue_tail(&ndev->cmd_q, skb); queue_work(ndev->cmd_wq, &ndev->cmd_work); return 0; } EXPORT_SYMBOL(nci_send_cmd); /* Proprietary commands API */ static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops, size_t n_ops, __u16 opcode) { size_t i; const struct nci_driver_ops *op; if (!ops || !n_ops) return NULL; for (i = 0; i < n_ops; i++) { op = &ops[i]; if (op->opcode == opcode) return op; } return NULL; } static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, rsp_opcode); if (!op || !op->rsp) return -ENOTSUPP; return op->rsp(ndev, skb); } static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, ntf_opcode); if (!op || !op->ntf) return -ENOTSUPP; return op->ntf(ndev, skb); } int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops, ndev->ops->n_prop_ops); } int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops, ndev->ops->n_prop_ops); } int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops, ndev->ops->n_core_ops); } int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb) { return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops, ndev->ops->n_core_ops); } static bool nci_valid_size(struct sk_buff *skb) { BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE); unsigned int hdr_size = NCI_CTRL_HDR_SIZE; if (skb->len < hdr_size || !nci_plen(skb->data) || skb->len < hdr_size + nci_plen(skb->data)) { return false; } return true; } /* ---- NCI TX Data worker thread ---- */ static void nci_tx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work); struct nci_conn_info *conn_info; struct sk_buff *skb; conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); if (!conn_info) return; pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt)); /* Send queued tx data */ while (atomic_read(&conn_info->credits_cnt)) { skb = skb_dequeue(&ndev->tx_q); if (!skb) return; kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Check if data flow control is used */ if (atomic_read(&conn_info->credits_cnt) != NCI_DATA_FLOW_CONTROL_NOT_USED) atomic_dec(&conn_info->credits_cnt); pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n", nci_pbf(skb->data), nci_conn_id(skb->data), nci_plen(skb->data)); nci_send_frame(ndev, skb); mod_timer(&ndev->data_timer, jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); kcov_remote_stop(); } } /* ----- NCI RX worker thread (data & control) ----- */ static void nci_rx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, rx_work); struct sk_buff *skb; for (; (skb = skb_dequeue(&ndev->rx_q)); kcov_remote_stop()) { kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Send copy to sniffer */ nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); if (!nci_valid_size(skb)) { kfree_skb(skb); continue; } /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: nci_rsp_packet(ndev, skb); break; case NCI_MT_NTF_PKT: nci_ntf_packet(ndev, skb); break; case NCI_MT_DATA_PKT: nci_rx_data_packet(ndev, skb); break; default: pr_err("unknown MT 0x%x\n", nci_mt(skb->data)); kfree_skb(skb); break; } } /* check if a data exchange timeout has occurred */ if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) { /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id, -ETIMEDOUT); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); } } /* ----- NCI TX CMD worker thread ----- */ static void nci_cmd_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, cmd_work); struct sk_buff *skb; pr_debug("cmd_cnt %d\n", atomic_read(&ndev->cmd_cnt)); /* Send queued command */ if (atomic_read(&ndev->cmd_cnt)) { skb = skb_dequeue(&ndev->cmd_q); if (!skb) return; kcov_remote_start_common(skb_get_kcov_handle(skb)); atomic_dec(&ndev->cmd_cnt); pr_debug("NCI TX: MT=cmd, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", nci_pbf(skb->data), nci_opcode_gid(nci_opcode(skb->data)), nci_opcode_oid(nci_opcode(skb->data)), nci_plen(skb->data)); nci_send_frame(ndev, skb); mod_timer(&ndev->cmd_timer, jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); kcov_remote_stop(); } } MODULE_DESCRIPTION("NFC Controller Interface"); MODULE_LICENSE("GPL");
193 82 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM fuse #if !defined(_TRACE_FUSE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FUSE_H #include <linux/tracepoint.h> #define OPCODES \ EM( FUSE_LOOKUP, "FUSE_LOOKUP") \ EM( FUSE_FORGET, "FUSE_FORGET") \ EM( FUSE_GETATTR, "FUSE_GETATTR") \ EM( FUSE_SETATTR, "FUSE_SETATTR") \ EM( FUSE_READLINK, "FUSE_READLINK") \ EM( FUSE_SYMLINK, "FUSE_SYMLINK") \ EM( FUSE_MKNOD, "FUSE_MKNOD") \ EM( FUSE_MKDIR, "FUSE_MKDIR") \ EM( FUSE_UNLINK, "FUSE_UNLINK") \ EM( FUSE_RMDIR, "FUSE_RMDIR") \ EM( FUSE_RENAME, "FUSE_RENAME") \ EM( FUSE_LINK, "FUSE_LINK") \ EM( FUSE_OPEN, "FUSE_OPEN") \ EM( FUSE_READ, "FUSE_READ") \ EM( FUSE_WRITE, "FUSE_WRITE") \ EM( FUSE_STATFS, "FUSE_STATFS") \ EM( FUSE_RELEASE, "FUSE_RELEASE") \ EM( FUSE_FSYNC, "FUSE_FSYNC") \ EM( FUSE_SETXATTR, "FUSE_SETXATTR") \ EM( FUSE_GETXATTR, "FUSE_GETXATTR") \ EM( FUSE_LISTXATTR, "FUSE_LISTXATTR") \ EM( FUSE_REMOVEXATTR, "FUSE_REMOVEXATTR") \ EM( FUSE_FLUSH, "FUSE_FLUSH") \ EM( FUSE_INIT, "FUSE_INIT") \ EM( FUSE_OPENDIR, "FUSE_OPENDIR") \ EM( FUSE_READDIR, "FUSE_READDIR") \ EM( FUSE_RELEASEDIR, "FUSE_RELEASEDIR") \ EM( FUSE_FSYNCDIR, "FUSE_FSYNCDIR") \ EM( FUSE_GETLK, "FUSE_GETLK") \ EM( FUSE_SETLK, "FUSE_SETLK") \ EM( FUSE_SETLKW, "FUSE_SETLKW") \ EM( FUSE_ACCESS, "FUSE_ACCESS") \ EM( FUSE_CREATE, "FUSE_CREATE") \ EM( FUSE_INTERRUPT, "FUSE_INTERRUPT") \ EM( FUSE_BMAP, "FUSE_BMAP") \ EM( FUSE_DESTROY, "FUSE_DESTROY") \ EM( FUSE_IOCTL, "FUSE_IOCTL") \ EM( FUSE_POLL, "FUSE_POLL") \ EM( FUSE_NOTIFY_REPLY, "FUSE_NOTIFY_REPLY") \ EM( FUSE_BATCH_FORGET, "FUSE_BATCH_FORGET") \ EM( FUSE_FALLOCATE, "FUSE_FALLOCATE") \ EM( FUSE_READDIRPLUS, "FUSE_READDIRPLUS") \ EM( FUSE_RENAME2, "FUSE_RENAME2") \ EM( FUSE_LSEEK, "FUSE_LSEEK") \ EM( FUSE_COPY_FILE_RANGE, "FUSE_COPY_FILE_RANGE") \ EM( FUSE_SETUPMAPPING, "FUSE_SETUPMAPPING") \ EM( FUSE_REMOVEMAPPING, "FUSE_REMOVEMAPPING") \ EM( FUSE_SYNCFS, "FUSE_SYNCFS") \ EM( FUSE_TMPFILE, "FUSE_TMPFILE") \ EM( FUSE_STATX, "FUSE_STATX") \ EMe(CUSE_INIT, "CUSE_INIT") /* * This will turn the above table into TRACE_DEFINE_ENUM() for each of the * entries. */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); OPCODES /* Now we redfine it with the table that __print_symbolic needs. */ #undef EM #undef EMe #define EM(a, b) {a, b}, #define EMe(a, b) {a, b} TRACE_EVENT(fuse_request_send, TP_PROTO(const struct fuse_req *req), TP_ARGS(req), TP_STRUCT__entry( __field(dev_t, connection) __field(uint64_t, unique) __field(enum fuse_opcode, opcode) __field(uint32_t, len) ), TP_fast_assign( __entry->connection = req->fm->fc->dev; __entry->unique = req->in.h.unique; __entry->opcode = req->in.h.opcode; __entry->len = req->in.h.len; ), TP_printk("connection %u req %llu opcode %u (%s) len %u ", __entry->connection, __entry->unique, __entry->opcode, __print_symbolic(__entry->opcode, OPCODES), __entry->len) ); TRACE_EVENT(fuse_request_end, TP_PROTO(const struct fuse_req *req), TP_ARGS(req), TP_STRUCT__entry( __field(dev_t, connection) __field(uint64_t, unique) __field(uint32_t, len) __field(int32_t, error) ), TP_fast_assign( __entry->connection = req->fm->fc->dev; __entry->unique = req->in.h.unique; __entry->len = req->out.h.len; __entry->error = req->out.h.error; ), TP_printk("connection %u req %llu len %u error %d", __entry->connection, __entry->unique, __entry->len, __entry->error) ); #endif /* _TRACE_FUSE_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE fuse_trace #include <trace/define_trace.h>
4 4 2 2 2 2 9 9 9 9 9 9 9 5 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 // SPDX-License-Identifier: GPL-2.0-or-later /**************************************************************** * * kaweth.c - driver for KL5KUSB101 based USB->Ethernet * * (c) 2000 Interlan Communications * (c) 2000 Stephane Alnet * (C) 2001 Brad Hards * (C) 2002 Oliver Neukum * * Original author: The Zapman <zapman@interlan.net> * Inspired by, and much credit goes to Michael Rothwell * <rothwell@interlan.net> for the test equipment, help, and patience * Based off of (and with thanks to) Petko Manolov's pegaus.c driver. * Also many thanks to Joel Silverman and Ed Surprenant at Kawasaki * for providing the firmware and driver resources. * ****************************************************************/ /* TODO: * Develop test procedures for USB net interfaces * Run test procedures * Fix bugs from previous two steps * Snoop other OSs for any tricks we're not doing * Reduce arbitrary timeouts * Smart multicast support * Temporary MAC change support * Tunable SOFs parameter - ioctl()? * Ethernet stats collection * Code formatting improvements */ #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/usb.h> #include <linux/types.h> #include <linux/ethtool.h> #include <linux/dma-mapping.h> #include <linux/wait.h> #include <linux/firmware.h> #include <linux/uaccess.h> #include <asm/byteorder.h> #undef DEBUG #define KAWETH_MTU 1514 #define KAWETH_BUF_SIZE 1664 #define KAWETH_TX_TIMEOUT (5 * HZ) #define KAWETH_SCRATCH_SIZE 32 #define KAWETH_FIRMWARE_BUF_SIZE 4096 #define KAWETH_CONTROL_TIMEOUT (30000) #define KAWETH_STATUS_BROKEN 0x0000001 #define KAWETH_STATUS_CLOSING 0x0000002 #define KAWETH_STATUS_SUSPENDING 0x0000004 #define KAWETH_STATUS_BLOCKED (KAWETH_STATUS_CLOSING | KAWETH_STATUS_SUSPENDING) #define KAWETH_PACKET_FILTER_PROMISCUOUS 0x01 #define KAWETH_PACKET_FILTER_ALL_MULTICAST 0x02 #define KAWETH_PACKET_FILTER_DIRECTED 0x04 #define KAWETH_PACKET_FILTER_BROADCAST 0x08 #define KAWETH_PACKET_FILTER_MULTICAST 0x10 /* Table 7 */ #define KAWETH_COMMAND_GET_ETHERNET_DESC 0x00 #define KAWETH_COMMAND_MULTICAST_FILTERS 0x01 #define KAWETH_COMMAND_SET_PACKET_FILTER 0x02 #define KAWETH_COMMAND_STATISTICS 0x03 #define KAWETH_COMMAND_SET_TEMP_MAC 0x06 #define KAWETH_COMMAND_GET_TEMP_MAC 0x07 #define KAWETH_COMMAND_SET_URB_SIZE 0x08 #define KAWETH_COMMAND_SET_SOFS_WAIT 0x09 #define KAWETH_COMMAND_SCAN 0xFF #define KAWETH_SOFS_TO_WAIT 0x05 #define INTBUFFERSIZE 4 #define STATE_OFFSET 0 #define STATE_MASK 0x40 #define STATE_SHIFT 5 #define IS_BLOCKED(s) (s & KAWETH_STATUS_BLOCKED) MODULE_AUTHOR("Michael Zappe <zapman@interlan.net>, Stephane Alnet <stephane@u-picardie.fr>, Brad Hards <bhards@bigpond.net.au> and Oliver Neukum <oliver@neukum.org>"); MODULE_DESCRIPTION("KL5USB101 USB Ethernet driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE("kaweth/new_code.bin"); MODULE_FIRMWARE("kaweth/new_code_fix.bin"); MODULE_FIRMWARE("kaweth/trigger_code.bin"); MODULE_FIRMWARE("kaweth/trigger_code_fix.bin"); static const char driver_name[] = "kaweth"; static int kaweth_probe( struct usb_interface *intf, const struct usb_device_id *id /* from id_table */ ); static void kaweth_disconnect(struct usb_interface *intf); static int kaweth_suspend(struct usb_interface *intf, pm_message_t message); static int kaweth_resume(struct usb_interface *intf); /**************************************************************** * usb_device_id ****************************************************************/ static const struct usb_device_id usb_klsi_table[] = { { USB_DEVICE(0x03e8, 0x0008) }, /* AOX Endpoints USB Ethernet */ { USB_DEVICE(0x04bb, 0x0901) }, /* I-O DATA USB-ET/T */ { USB_DEVICE(0x0506, 0x03e8) }, /* 3Com 3C19250 */ { USB_DEVICE(0x0506, 0x11f8) }, /* 3Com 3C460 */ { USB_DEVICE(0x0557, 0x2002) }, /* ATEN USB Ethernet */ { USB_DEVICE(0x0557, 0x4000) }, /* D-Link DSB-650C */ { USB_DEVICE(0x0565, 0x0002) }, /* Peracom Enet */ { USB_DEVICE(0x0565, 0x0003) }, /* Optus@Home UEP1045A */ { USB_DEVICE(0x0565, 0x0005) }, /* Peracom Enet2 */ { USB_DEVICE(0x05e9, 0x0008) }, /* KLSI KL5KUSB101B */ { USB_DEVICE(0x05e9, 0x0009) }, /* KLSI KL5KUSB101B (Board change) */ { USB_DEVICE(0x066b, 0x2202) }, /* Linksys USB10T */ { USB_DEVICE(0x06e1, 0x0008) }, /* ADS USB-10BT */ { USB_DEVICE(0x06e1, 0x0009) }, /* ADS USB-10BT */ { USB_DEVICE(0x0707, 0x0100) }, /* SMC 2202USB */ { USB_DEVICE(0x07aa, 0x0001) }, /* Correga K.K. */ { USB_DEVICE(0x07b8, 0x4000) }, /* D-Link DU-E10 */ { USB_DEVICE(0x07c9, 0xb010) }, /* Allied Telesyn AT-USB10 USB Ethernet Adapter */ { USB_DEVICE(0x0846, 0x1001) }, /* NetGear EA-101 */ { USB_DEVICE(0x0846, 0x1002) }, /* NetGear EA-101 */ { USB_DEVICE(0x085a, 0x0008) }, /* PortGear Ethernet Adapter */ { USB_DEVICE(0x085a, 0x0009) }, /* PortGear Ethernet Adapter */ { USB_DEVICE(0x087d, 0x5704) }, /* Jaton USB Ethernet Device Adapter */ { USB_DEVICE(0x0951, 0x0008) }, /* Kingston Technology USB Ethernet Adapter */ { USB_DEVICE(0x095a, 0x3003) }, /* Portsmith Express Ethernet Adapter */ { USB_DEVICE(0x10bd, 0x1427) }, /* ASANTE USB To Ethernet Adapter */ { USB_DEVICE(0x1342, 0x0204) }, /* Mobility USB-Ethernet Adapter */ { USB_DEVICE(0x13d2, 0x0400) }, /* Shark Pocket Adapter */ { USB_DEVICE(0x1485, 0x0001) }, /* Silicom U2E */ { USB_DEVICE(0x1485, 0x0002) }, /* Psion Dacom Gold Port Ethernet */ { USB_DEVICE(0x1645, 0x0005) }, /* Entrega E45 */ { USB_DEVICE(0x1645, 0x0008) }, /* Entrega USB Ethernet Adapter */ { USB_DEVICE(0x1645, 0x8005) }, /* PortGear Ethernet Adapter */ { USB_DEVICE(0x1668, 0x0323) }, /* Actiontec USB Ethernet */ { USB_DEVICE(0x2001, 0x4000) }, /* D-link DSB-650C */ {} /* Null terminator */ }; MODULE_DEVICE_TABLE (usb, usb_klsi_table); /**************************************************************** * kaweth_driver ****************************************************************/ static struct usb_driver kaweth_driver = { .name = driver_name, .probe = kaweth_probe, .disconnect = kaweth_disconnect, .suspend = kaweth_suspend, .resume = kaweth_resume, .id_table = usb_klsi_table, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; typedef __u8 eth_addr_t[6]; /**************************************************************** * usb_eth_dev ****************************************************************/ struct usb_eth_dev { char *name; __u16 vendor; __u16 device; void *pdata; }; /**************************************************************** * kaweth_ethernet_configuration * Refer Table 8 ****************************************************************/ struct kaweth_ethernet_configuration { __u8 size; __u8 reserved1; __u8 reserved2; eth_addr_t hw_addr; __u32 statistics_mask; __le16 segment_size; __u16 max_multicast_filters; __u8 reserved3; } __packed; /**************************************************************** * kaweth_device ****************************************************************/ struct kaweth_device { spinlock_t device_lock; __u32 status; int end; int suspend_lowmem_rx; int suspend_lowmem_ctrl; int linkstate; int opened; struct delayed_work lowmem_work; struct usb_device *dev; struct usb_interface *intf; struct net_device *net; wait_queue_head_t term_wait; struct urb *rx_urb; struct urb *tx_urb; struct urb *irq_urb; dma_addr_t intbufferhandle; __u8 *intbuffer; dma_addr_t rxbufferhandle; __u8 *rx_buf; struct sk_buff *tx_skb; __u8 *firmware_buf; __u8 scratch[KAWETH_SCRATCH_SIZE]; __u16 packet_filter_bitmap; struct kaweth_ethernet_configuration configuration; }; /**************************************************************** * kaweth_read_configuration ****************************************************************/ static int kaweth_read_configuration(struct kaweth_device *kaweth) { return usb_control_msg(kaweth->dev, usb_rcvctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_GET_ETHERNET_DESC, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, &kaweth->configuration, sizeof(kaweth->configuration), KAWETH_CONTROL_TIMEOUT); } /**************************************************************** * kaweth_set_urb_size ****************************************************************/ static int kaweth_set_urb_size(struct kaweth_device *kaweth, __u16 urb_size) { netdev_dbg(kaweth->net, "Setting URB size to %d\n", (unsigned)urb_size); return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SET_URB_SIZE, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, urb_size, 0, &kaweth->scratch, 0, KAWETH_CONTROL_TIMEOUT); } /**************************************************************** * kaweth_set_sofs_wait ****************************************************************/ static int kaweth_set_sofs_wait(struct kaweth_device *kaweth, __u16 sofs_wait) { netdev_dbg(kaweth->net, "Set SOFS wait to %d\n", (unsigned)sofs_wait); return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SET_SOFS_WAIT, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, sofs_wait, 0, &kaweth->scratch, 0, KAWETH_CONTROL_TIMEOUT); } /**************************************************************** * kaweth_set_receive_filter ****************************************************************/ static int kaweth_set_receive_filter(struct kaweth_device *kaweth, __u16 receive_filter) { netdev_dbg(kaweth->net, "Set receive filter to %d\n", (unsigned)receive_filter); return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SET_PACKET_FILTER, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, receive_filter, 0, &kaweth->scratch, 0, KAWETH_CONTROL_TIMEOUT); } /**************************************************************** * kaweth_download_firmware ****************************************************************/ static int kaweth_download_firmware(struct kaweth_device *kaweth, const char *fwname, __u8 interrupt, __u8 type) { const struct firmware *fw; int data_len; int ret; ret = request_firmware(&fw, fwname, &kaweth->dev->dev); if (ret) { dev_err(&kaweth->intf->dev, "Firmware request failed\n"); return ret; } if (fw->size > KAWETH_FIRMWARE_BUF_SIZE) { dev_err(&kaweth->intf->dev, "Firmware too big: %zu\n", fw->size); release_firmware(fw); return -ENOSPC; } data_len = fw->size; memcpy(kaweth->firmware_buf, fw->data, fw->size); release_firmware(fw); kaweth->firmware_buf[2] = (data_len & 0xFF) - 7; kaweth->firmware_buf[3] = data_len >> 8; kaweth->firmware_buf[4] = type; kaweth->firmware_buf[5] = interrupt; netdev_dbg(kaweth->net, "High: %i, Low:%i\n", kaweth->firmware_buf[3], kaweth->firmware_buf[2]); netdev_dbg(kaweth->net, "Downloading firmware at %p to kaweth device at %p\n", kaweth->firmware_buf, kaweth); netdev_dbg(kaweth->net, "Firmware length: %d\n", data_len); return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SCAN, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, 0, 0, kaweth->firmware_buf, data_len, KAWETH_CONTROL_TIMEOUT); } /**************************************************************** * kaweth_trigger_firmware ****************************************************************/ static int kaweth_trigger_firmware(struct kaweth_device *kaweth, __u8 interrupt) { kaweth->firmware_buf[0] = 0xB6; kaweth->firmware_buf[1] = 0xC3; kaweth->firmware_buf[2] = 0x01; kaweth->firmware_buf[3] = 0x00; kaweth->firmware_buf[4] = 0x06; kaweth->firmware_buf[5] = interrupt; kaweth->firmware_buf[6] = 0x00; kaweth->firmware_buf[7] = 0x00; return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SCAN, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, 0, 0, (void *)kaweth->firmware_buf, 8, KAWETH_CONTROL_TIMEOUT); } /**************************************************************** * kaweth_reset ****************************************************************/ static int kaweth_reset(struct kaweth_device *kaweth) { int result; result = usb_reset_configuration(kaweth->dev); mdelay(10); netdev_dbg(kaweth->net, "kaweth_reset() returns %d.\n", result); return result; } static void kaweth_usb_receive(struct urb *); static int kaweth_resubmit_rx_urb(struct kaweth_device *, gfp_t); /**************************************************************** int_callback *****************************************************************/ static void kaweth_resubmit_int_urb(struct kaweth_device *kaweth, gfp_t mf) { int status; status = usb_submit_urb (kaweth->irq_urb, mf); if (unlikely(status == -ENOMEM)) { kaweth->suspend_lowmem_ctrl = 1; schedule_delayed_work(&kaweth->lowmem_work, HZ/4); } else { kaweth->suspend_lowmem_ctrl = 0; } if (status) dev_err(&kaweth->intf->dev, "can't resubmit intr, %s-%s, status %d\n", kaweth->dev->bus->bus_name, kaweth->dev->devpath, status); } static void int_callback(struct urb *u) { struct kaweth_device *kaweth = u->context; int act_state; int status = u->status; switch (status) { case 0: /* success */ break; case -ECONNRESET: /* unlink */ case -ENOENT: case -ESHUTDOWN: return; /* -EPIPE: should clear the halt */ default: /* error */ goto resubmit; } /* we check the link state to report changes */ if (kaweth->linkstate != (act_state = ( kaweth->intbuffer[STATE_OFFSET] | STATE_MASK) >> STATE_SHIFT)) { if (act_state) netif_carrier_on(kaweth->net); else netif_carrier_off(kaweth->net); kaweth->linkstate = act_state; } resubmit: kaweth_resubmit_int_urb(kaweth, GFP_ATOMIC); } static void kaweth_resubmit_tl(struct work_struct *work) { struct kaweth_device *kaweth = container_of(work, struct kaweth_device, lowmem_work.work); if (IS_BLOCKED(kaweth->status)) return; if (kaweth->suspend_lowmem_rx) kaweth_resubmit_rx_urb(kaweth, GFP_NOIO); if (kaweth->suspend_lowmem_ctrl) kaweth_resubmit_int_urb(kaweth, GFP_NOIO); } /**************************************************************** * kaweth_resubmit_rx_urb ****************************************************************/ static int kaweth_resubmit_rx_urb(struct kaweth_device *kaweth, gfp_t mem_flags) { int result; usb_fill_bulk_urb(kaweth->rx_urb, kaweth->dev, usb_rcvbulkpipe(kaweth->dev, 1), kaweth->rx_buf, KAWETH_BUF_SIZE, kaweth_usb_receive, kaweth); kaweth->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; kaweth->rx_urb->transfer_dma = kaweth->rxbufferhandle; if((result = usb_submit_urb(kaweth->rx_urb, mem_flags))) { if (result == -ENOMEM) { kaweth->suspend_lowmem_rx = 1; schedule_delayed_work(&kaweth->lowmem_work, HZ/4); } dev_err(&kaweth->intf->dev, "resubmitting rx_urb %d failed\n", result); } else { kaweth->suspend_lowmem_rx = 0; } return result; } static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth, bool may_sleep); /**************************************************************** * kaweth_usb_receive ****************************************************************/ static void kaweth_usb_receive(struct urb *urb) { struct device *dev = &urb->dev->dev; struct kaweth_device *kaweth = urb->context; struct net_device *net = kaweth->net; int status = urb->status; unsigned long flags; int count = urb->actual_length; int count2 = urb->transfer_buffer_length; __u16 pkt_len = le16_to_cpup((__le16 *)kaweth->rx_buf); struct sk_buff *skb; if (unlikely(status == -EPIPE)) { net->stats.rx_errors++; kaweth->end = 1; wake_up(&kaweth->term_wait); dev_dbg(dev, "Status was -EPIPE.\n"); return; } if (unlikely(status == -ECONNRESET || status == -ESHUTDOWN)) { /* we are killed - set a flag and wake the disconnect handler */ kaweth->end = 1; wake_up(&kaweth->term_wait); dev_dbg(dev, "Status was -ECONNRESET or -ESHUTDOWN.\n"); return; } if (unlikely(status == -EPROTO || status == -ETIME || status == -EILSEQ)) { net->stats.rx_errors++; dev_dbg(dev, "Status was -EPROTO, -ETIME, or -EILSEQ.\n"); return; } if (unlikely(status == -EOVERFLOW)) { net->stats.rx_errors++; dev_dbg(dev, "Status was -EOVERFLOW.\n"); } spin_lock_irqsave(&kaweth->device_lock, flags); if (IS_BLOCKED(kaweth->status)) { spin_unlock_irqrestore(&kaweth->device_lock, flags); return; } spin_unlock_irqrestore(&kaweth->device_lock, flags); if(status && status != -EREMOTEIO && count != 1) { dev_err(&kaweth->intf->dev, "%s RX status: %d count: %d packet_len: %d\n", net->name, status, count, (int)pkt_len); kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC); return; } if(kaweth->net && (count > 2)) { if(pkt_len > (count - 2)) { dev_err(&kaweth->intf->dev, "Packet length too long for USB frame (pkt_len: %x, count: %x)\n", pkt_len, count); dev_err(&kaweth->intf->dev, "Packet len & 2047: %x\n", pkt_len & 2047); dev_err(&kaweth->intf->dev, "Count 2: %x\n", count2); kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC); return; } if(!(skb = dev_alloc_skb(pkt_len+2))) { kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC); return; } skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, net); netif_rx(skb); net->stats.rx_packets++; net->stats.rx_bytes += pkt_len; } kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC); } /**************************************************************** * kaweth_open ****************************************************************/ static int kaweth_open(struct net_device *net) { struct kaweth_device *kaweth = netdev_priv(net); int res; res = usb_autopm_get_interface(kaweth->intf); if (res) { dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n"); return -EIO; } res = kaweth_resubmit_rx_urb(kaweth, GFP_KERNEL); if (res) goto err_out; usb_fill_int_urb( kaweth->irq_urb, kaweth->dev, usb_rcvintpipe(kaweth->dev, 3), kaweth->intbuffer, INTBUFFERSIZE, int_callback, kaweth, 250); /* overriding the descriptor */ kaweth->irq_urb->transfer_dma = kaweth->intbufferhandle; kaweth->irq_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; res = usb_submit_urb(kaweth->irq_urb, GFP_KERNEL); if (res) { usb_kill_urb(kaweth->rx_urb); goto err_out; } kaweth->opened = 1; netif_start_queue(net); kaweth_async_set_rx_mode(kaweth, true); return 0; err_out: usb_autopm_put_interface(kaweth->intf); return -EIO; } /**************************************************************** * kaweth_kill_urbs ****************************************************************/ static void kaweth_kill_urbs(struct kaweth_device *kaweth) { usb_kill_urb(kaweth->irq_urb); usb_kill_urb(kaweth->rx_urb); usb_kill_urb(kaweth->tx_urb); cancel_delayed_work_sync(&kaweth->lowmem_work); /* a scheduled work may have resubmitted, we hit them again */ usb_kill_urb(kaweth->irq_urb); usb_kill_urb(kaweth->rx_urb); } /**************************************************************** * kaweth_close ****************************************************************/ static int kaweth_close(struct net_device *net) { struct kaweth_device *kaweth = netdev_priv(net); netif_stop_queue(net); kaweth->opened = 0; kaweth->status |= KAWETH_STATUS_CLOSING; kaweth_kill_urbs(kaweth); kaweth->status &= ~KAWETH_STATUS_CLOSING; usb_autopm_put_interface(kaweth->intf); return 0; } static u32 kaweth_get_link(struct net_device *dev) { struct kaweth_device *kaweth = netdev_priv(dev); return kaweth->linkstate; } static const struct ethtool_ops ops = { .get_link = kaweth_get_link }; /**************************************************************** * kaweth_usb_transmit_complete ****************************************************************/ static void kaweth_usb_transmit_complete(struct urb *urb) { struct kaweth_device *kaweth = urb->context; struct sk_buff *skb = kaweth->tx_skb; int status = urb->status; if (unlikely(status != 0)) if (status != -ENOENT) dev_dbg(&urb->dev->dev, "%s: TX status %d.\n", kaweth->net->name, status); netif_wake_queue(kaweth->net); dev_kfree_skb_irq(skb); } /**************************************************************** * kaweth_start_xmit ****************************************************************/ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) { struct kaweth_device *kaweth = netdev_priv(net); __le16 *private_header; int res; spin_lock_irq(&kaweth->device_lock); kaweth_async_set_rx_mode(kaweth, false); netif_stop_queue(net); if (IS_BLOCKED(kaweth->status)) { goto skip; } /* We now decide whether we can put our special header into the sk_buff */ if (skb_cow_head(skb, 2)) { net->stats.tx_errors++; netif_start_queue(net); spin_unlock_irq(&kaweth->device_lock); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } private_header = __skb_push(skb, 2); *private_header = cpu_to_le16(skb->len-2); kaweth->tx_skb = skb; usb_fill_bulk_urb(kaweth->tx_urb, kaweth->dev, usb_sndbulkpipe(kaweth->dev, 2), private_header, skb->len, kaweth_usb_transmit_complete, kaweth); kaweth->end = 0; if((res = usb_submit_urb(kaweth->tx_urb, GFP_ATOMIC))) { dev_warn(&net->dev, "kaweth failed tx_urb %d\n", res); skip: net->stats.tx_errors++; netif_start_queue(net); dev_kfree_skb_irq(skb); } else { net->stats.tx_packets++; net->stats.tx_bytes += skb->len; } spin_unlock_irq(&kaweth->device_lock); return NETDEV_TX_OK; } /**************************************************************** * kaweth_set_rx_mode ****************************************************************/ static void kaweth_set_rx_mode(struct net_device *net) { struct kaweth_device *kaweth = netdev_priv(net); __u16 packet_filter_bitmap = KAWETH_PACKET_FILTER_DIRECTED | KAWETH_PACKET_FILTER_BROADCAST | KAWETH_PACKET_FILTER_MULTICAST; netdev_dbg(net, "Setting Rx mode to %d\n", packet_filter_bitmap); netif_stop_queue(net); if (net->flags & IFF_PROMISC) { packet_filter_bitmap |= KAWETH_PACKET_FILTER_PROMISCUOUS; } else if (!netdev_mc_empty(net) || (net->flags & IFF_ALLMULTI)) { packet_filter_bitmap |= KAWETH_PACKET_FILTER_ALL_MULTICAST; } kaweth->packet_filter_bitmap = packet_filter_bitmap; netif_wake_queue(net); } /**************************************************************** * kaweth_async_set_rx_mode ****************************************************************/ static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth, bool may_sleep) { int ret; __u16 packet_filter_bitmap = kaweth->packet_filter_bitmap; kaweth->packet_filter_bitmap = 0; if (packet_filter_bitmap == 0) return; if (!may_sleep) return; ret = usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SET_PACKET_FILTER, USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE, packet_filter_bitmap, 0, &kaweth->scratch, 0, KAWETH_CONTROL_TIMEOUT); if (ret < 0) dev_err(&kaweth->intf->dev, "Failed to set Rx mode: %d\n", ret); else netdev_dbg(kaweth->net, "Set Rx mode to %d\n", packet_filter_bitmap); } /**************************************************************** * kaweth_tx_timeout ****************************************************************/ static void kaweth_tx_timeout(struct net_device *net, unsigned int txqueue) { struct kaweth_device *kaweth = netdev_priv(net); dev_warn(&net->dev, "%s: Tx timed out. Resetting.\n", net->name); net->stats.tx_errors++; netif_trans_update(net); usb_unlink_urb(kaweth->tx_urb); } /**************************************************************** * kaweth_suspend ****************************************************************/ static int kaweth_suspend(struct usb_interface *intf, pm_message_t message) { struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status |= KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); kaweth_kill_urbs(kaweth); return 0; } /**************************************************************** * kaweth_resume ****************************************************************/ static int kaweth_resume(struct usb_interface *intf) { struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status &= ~KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); if (!kaweth->opened) return 0; kaweth_resubmit_rx_urb(kaweth, GFP_NOIO); kaweth_resubmit_int_urb(kaweth, GFP_NOIO); return 0; } /**************************************************************** * kaweth_probe ****************************************************************/ static const struct net_device_ops kaweth_netdev_ops = { .ndo_open = kaweth_open, .ndo_stop = kaweth_close, .ndo_start_xmit = kaweth_start_xmit, .ndo_tx_timeout = kaweth_tx_timeout, .ndo_set_rx_mode = kaweth_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; static int kaweth_probe( struct usb_interface *intf, const struct usb_device_id *id /* from id_table */ ) { struct device *dev = &intf->dev; struct usb_device *udev = interface_to_usbdev(intf); struct kaweth_device *kaweth; struct net_device *netdev; const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; int rv = -EIO; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", udev->devnum, le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), le16_to_cpu(udev->descriptor.bcdDevice)); dev_dbg(dev, "Device at %p\n", udev); dev_dbg(dev, "Descriptor length: %x type: %x\n", (int)udev->descriptor.bLength, (int)udev->descriptor.bDescriptorType); netdev = alloc_etherdev(sizeof(*kaweth)); if (!netdev) return -ENOMEM; kaweth = netdev_priv(netdev); kaweth->dev = udev; kaweth->net = netdev; kaweth->intf = intf; spin_lock_init(&kaweth->device_lock); init_waitqueue_head(&kaweth->term_wait); dev_dbg(dev, "Resetting.\n"); kaweth_reset(kaweth); /* * If high byte of bcdDevice is nonzero, firmware is already * downloaded. Don't try to do it again, or we'll hang the device. */ if (le16_to_cpu(udev->descriptor.bcdDevice) >> 8) { dev_info(dev, "Firmware present in device.\n"); } else { /* Download the firmware */ dev_info(dev, "Downloading firmware...\n"); kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL); if (!kaweth->firmware_buf) { rv = -ENOMEM; goto err_free_netdev; } if ((result = kaweth_download_firmware(kaweth, "kaweth/new_code.bin", 100, 2)) < 0) { dev_err(dev, "Error downloading firmware (%d)\n", result); goto err_fw; } if ((result = kaweth_download_firmware(kaweth, "kaweth/new_code_fix.bin", 100, 3)) < 0) { dev_err(dev, "Error downloading firmware fix (%d)\n", result); goto err_fw; } if ((result = kaweth_download_firmware(kaweth, "kaweth/trigger_code.bin", 126, 2)) < 0) { dev_err(dev, "Error downloading trigger code (%d)\n", result); goto err_fw; } if ((result = kaweth_download_firmware(kaweth, "kaweth/trigger_code_fix.bin", 126, 3)) < 0) { dev_err(dev, "Error downloading trigger code fix (%d)\n", result); goto err_fw; } if ((result = kaweth_trigger_firmware(kaweth, 126)) < 0) { dev_err(dev, "Error triggering firmware (%d)\n", result); goto err_fw; } /* Device will now disappear for a moment... */ dev_info(dev, "Firmware loaded. I'll be back...\n"); err_fw: free_page((unsigned long)kaweth->firmware_buf); free_netdev(netdev); return -EIO; } result = kaweth_read_configuration(kaweth); if(result < 0) { dev_err(dev, "Error reading configuration (%d), no net device created\n", result); goto err_free_netdev; } dev_info(dev, "Statistics collection: %x\n", kaweth->configuration.statistics_mask); dev_info(dev, "Multicast filter limit: %x\n", kaweth->configuration.max_multicast_filters & ((1 << 15) - 1)); dev_info(dev, "MTU: %d\n", le16_to_cpu(kaweth->configuration.segment_size)); dev_info(dev, "Read MAC address %pM\n", kaweth->configuration.hw_addr); if(!memcmp(&kaweth->configuration.hw_addr, &bcast_addr, sizeof(bcast_addr))) { dev_err(dev, "Firmware not functioning properly, no net device created\n"); goto err_free_netdev; } if(kaweth_set_urb_size(kaweth, KAWETH_BUF_SIZE) < 0) { dev_dbg(dev, "Error setting URB size\n"); goto err_free_netdev; } if(kaweth_set_sofs_wait(kaweth, KAWETH_SOFS_TO_WAIT) < 0) { dev_err(dev, "Error setting SOFS wait\n"); goto err_free_netdev; } result = kaweth_set_receive_filter(kaweth, KAWETH_PACKET_FILTER_DIRECTED | KAWETH_PACKET_FILTER_BROADCAST | KAWETH_PACKET_FILTER_MULTICAST); if(result < 0) { dev_err(dev, "Error setting receive filter\n"); goto err_free_netdev; } dev_dbg(dev, "Initializing net device.\n"); kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!kaweth->tx_urb) goto err_free_netdev; kaweth->rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!kaweth->rx_urb) goto err_only_tx; kaweth->irq_urb = usb_alloc_urb(0, GFP_KERNEL); if (!kaweth->irq_urb) goto err_tx_and_rx; kaweth->intbuffer = usb_alloc_coherent( kaweth->dev, INTBUFFERSIZE, GFP_KERNEL, &kaweth->intbufferhandle); if (!kaweth->intbuffer) goto err_tx_and_rx_and_irq; kaweth->rx_buf = usb_alloc_coherent( kaweth->dev, KAWETH_BUF_SIZE, GFP_KERNEL, &kaweth->rxbufferhandle); if (!kaweth->rx_buf) goto err_all_but_rxbuf; memcpy(netdev->broadcast, &bcast_addr, sizeof(bcast_addr)); eth_hw_addr_set(netdev, (u8 *)&kaweth->configuration.hw_addr); netdev->netdev_ops = &kaweth_netdev_ops; netdev->watchdog_timeo = KAWETH_TX_TIMEOUT; netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size); netdev->ethtool_ops = &ops; /* kaweth is zeroed as part of alloc_netdev */ INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl); usb_set_intfdata(intf, kaweth); SET_NETDEV_DEV(netdev, dev); if (register_netdev(netdev) != 0) { dev_err(dev, "Error registering netdev.\n"); goto err_intfdata; } dev_info(dev, "kaweth interface created at %s\n", kaweth->net->name); return 0; err_intfdata: usb_set_intfdata(intf, NULL); usb_free_coherent(kaweth->dev, KAWETH_BUF_SIZE, (void *)kaweth->rx_buf, kaweth->rxbufferhandle); err_all_but_rxbuf: usb_free_coherent(kaweth->dev, INTBUFFERSIZE, (void *)kaweth->intbuffer, kaweth->intbufferhandle); err_tx_and_rx_and_irq: usb_free_urb(kaweth->irq_urb); err_tx_and_rx: usb_free_urb(kaweth->rx_urb); err_only_tx: usb_free_urb(kaweth->tx_urb); err_free_netdev: free_netdev(netdev); return rv; } /**************************************************************** * kaweth_disconnect ****************************************************************/ static void kaweth_disconnect(struct usb_interface *intf) { struct kaweth_device *kaweth = usb_get_intfdata(intf); struct net_device *netdev; usb_set_intfdata(intf, NULL); if (!kaweth) { dev_warn(&intf->dev, "unregistering non-existent device\n"); return; } netdev = kaweth->net; netdev_dbg(kaweth->net, "Unregistering net device\n"); unregister_netdev(netdev); usb_free_urb(kaweth->rx_urb); usb_free_urb(kaweth->tx_urb); usb_free_urb(kaweth->irq_urb); usb_free_coherent(kaweth->dev, KAWETH_BUF_SIZE, (void *)kaweth->rx_buf, kaweth->rxbufferhandle); usb_free_coherent(kaweth->dev, INTBUFFERSIZE, (void *)kaweth->intbuffer, kaweth->intbufferhandle); free_netdev(netdev); } module_usb_driver(kaweth_driver);
5 5 1 4 5 5 5 1 5 5 5 5 5 5 5 10 1 2 5 1 5 1458 1455 1457 65 9 1 1 4 3 10 1 9 9 7 1 6 6 2 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_cbs.c Credit Based Shaper * * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> */ /* Credit Based Shaper (CBS) * ========================= * * This is a simple rate-limiting shaper aimed at TSN applications on * systems with known traffic workloads. * * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, * Section 8.6.8.2, and explained in more detail in the Annex L of the * same specification. * * There are four tunables to be considered: * * 'idleslope': Idleslope is the rate of credits that is * accumulated (in kilobits per second) when there is at least * one packet waiting for transmission. Packets are transmitted * when the current value of credits is equal or greater than * zero. When there is no packet to be transmitted the amount of * credits is set to zero. This is the main tunable of the CBS * algorithm. * * 'sendslope': * Sendslope is the rate of credits that is depleted (it should be a * negative number of kilobits per second) when a transmission is * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section * 8.6.8.2 item g): * * sendslope = idleslope - port_transmit_rate * * 'hicredit': Hicredit defines the maximum amount of credits (in * bytes) that can be accumulated. Hicredit depends on the * characteristics of interfering traffic, * 'max_interference_size' is the maximum size of any burst of * traffic that can delay the transmission of a frame that is * available for transmission for this traffic class, (IEEE * 802.1Q-2014 Annex L, Equation L-3): * * hicredit = max_interference_size * (idleslope / port_transmit_rate) * * 'locredit': Locredit is the minimum amount of credits that can * be reached. It is a function of the traffic flowing through * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): * * locredit = max_frame_size * (sendslope / port_transmit_rate) */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/units.h> #include <net/netevent.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> static LIST_HEAD(cbs_list); static DEFINE_SPINLOCK(cbs_list_lock); struct cbs_sched_data { bool offload; int queue; atomic64_t port_rate; /* in bytes/s */ s64 last; /* timestamp in ns */ s64 credits; /* in bytes */ s32 locredit; /* in bytes */ s32 hicredit; /* in bytes */ s64 sendslope; /* in bytes/s */ s64 idleslope; /* in bytes/s */ struct qdisc_watchdog watchdog; int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; struct list_head cbs_list; }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); int err; err = child->ops->enqueue(skb, child, to_free); if (err != NET_XMIT_SUCCESS) return err; sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; return cbs_child_enqueue(skb, sch, qdisc, to_free); } static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; if (sch->q.qlen == 0 && q->credits > 0) { /* We need to stop accumulating credits when there's * no enqueued packets and q->credits is positive. */ q->credits = 0; q->last = ktime_get_ns(); } return cbs_child_enqueue(skb, sch, qdisc, to_free); } static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); return q->enqueue(skb, sch, to_free); } /* timediff is in ns, slope is in bytes/s */ static s64 timediff_to_credits(s64 timediff, s64 slope) { return div64_s64(timediff * slope, NSEC_PER_SEC); } static s64 delay_from_credits(s64 credits, s64 slope) { if (unlikely(slope == 0)) return S64_MAX; return div64_s64(-credits * NSEC_PER_SEC, slope); } static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) { if (unlikely(port_rate == 0)) return S64_MAX; return div64_s64(len * slope, port_rate); } static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child) { struct sk_buff *skb; skb = child->ops->dequeue(child); if (!skb) return NULL; qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; s64 now = ktime_get_ns(); struct sk_buff *skb; s64 credits; int len; /* The previous packet is still being sent */ if (now < q->last) { qdisc_watchdog_schedule_ns(&q->watchdog, q->last); return NULL; } if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); credits = q->credits + credits; q->credits = min_t(s64, credits, q->hicredit); if (q->credits < 0) { s64 delay; delay = delay_from_credits(q->credits, q->idleslope); qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); q->last = now; return NULL; } } skb = cbs_child_dequeue(sch, qdisc); if (!skb) return NULL; len = qdisc_pkt_len(skb); /* As sendslope is a negative number, this will decrease the * amount of q->credits. */ credits = credits_from_len(len, q->sendslope, atomic64_read(&q->port_rate)); credits += q->credits; q->credits = max_t(s64, credits, q->locredit); /* Estimate of the transmission of the last byte of the packet in ns */ if (unlikely(atomic64_read(&q->port_rate) == 0)) q->last = now; else q->last = now + div64_s64(len * NSEC_PER_SEC, atomic64_read(&q->port_rate)); return skb; } static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; return cbs_child_dequeue(sch, qdisc); } static struct sk_buff *cbs_dequeue(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); return q->dequeue(sch); } static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, }; static void cbs_disable_offload(struct net_device *dev, struct cbs_sched_data *q) { struct tc_cbs_qopt_offload cbs = { }; const struct net_device_ops *ops; int err; if (!q->offload) return; q->enqueue = cbs_enqueue_soft; q->dequeue = cbs_dequeue_soft; ops = dev->netdev_ops; if (!ops->ndo_setup_tc) return; cbs.queue = q->queue; cbs.enable = 0; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) pr_warn("Couldn't disable CBS offload for queue %d\n", cbs.queue); } static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, const struct tc_cbs_qopt *opt, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; struct tc_cbs_qopt_offload cbs = { }; int err; if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload"); return -EOPNOTSUPP; } cbs.queue = q->queue; cbs.enable = 1; cbs.hicredit = opt->hicredit; cbs.locredit = opt->locredit; cbs.idleslope = opt->idleslope; cbs.sendslope = opt->sendslope; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) { NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload"); return err; } q->enqueue = cbs_enqueue_offload; q->dequeue = cbs_dequeue_offload; return 0; } static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; s64 port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); if (err < 0) goto skip; if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN) speed = ecmd.base.speed; skip: port_rate = speed * 1000 * BYTES_PER_KBIT; atomic64_set(&q->port_rate, port_rate); netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", dev->name, (long long)atomic64_read(&q->port_rate), ecmd.base.speed); } static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct cbs_sched_data *q; struct net_device *qdev; bool found = false; ASSERT_RTNL(); if (event != NETDEV_UP && event != NETDEV_CHANGE) return NOTIFY_DONE; spin_lock(&cbs_list_lock); list_for_each_entry(q, &cbs_list, cbs_list) { qdev = qdisc_dev(q->qdisc); if (qdev == dev) { found = true; break; } } spin_unlock(&cbs_list_lock); if (found) cbs_set_port_rate(dev, q); return NOTIFY_DONE; } static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct nlattr *tb[TCA_CBS_MAX + 1]; struct tc_cbs_qopt *qopt; int err; err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy, extack); if (err < 0) return err; if (!tb[TCA_CBS_PARMS]) { NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory"); return -EINVAL; } qopt = nla_data(tb[TCA_CBS_PARMS]); if (!qopt->offload) { cbs_set_port_rate(dev, q); cbs_disable_offload(dev, q); } else { err = cbs_enable_offload(dev, q, qopt, extack); if (err < 0) return err; } /* Everything went OK, save the parameters used. */ WRITE_ONCE(q->hicredit, qopt->hicredit); WRITE_ONCE(q->locredit, qopt->locredit); WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT); WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT); WRITE_ONCE(q->offload, qopt->offload); return 0; } static int cbs_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); if (!opt) { NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); return -EINVAL; } q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, extack); if (!q->qdisc) return -ENOMEM; spin_lock(&cbs_list_lock); list_add(&q->cbs_list, &cbs_list); spin_unlock(&cbs_list_lock); qdisc_hash_add(q->qdisc, false); q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); q->enqueue = cbs_enqueue_soft; q->dequeue = cbs_dequeue_soft; qdisc_watchdog_init(&q->watchdog, sch); return cbs_change(sch, opt, extack); } static void cbs_destroy(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); /* Nothing to do if we couldn't create the underlying qdisc */ if (!q->qdisc) return; qdisc_watchdog_cancel(&q->watchdog); cbs_disable_offload(dev, q); spin_lock(&cbs_list_lock); list_del(&q->cbs_list); spin_unlock(&cbs_list_lock); qdisc_put(q->qdisc); } static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) { struct cbs_sched_data *q = qdisc_priv(sch); struct tc_cbs_qopt opt = { }; struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; opt.hicredit = READ_ONCE(q->hicredit); opt.locredit = READ_ONCE(q->locredit); opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT); opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT); opt.offload = READ_ONCE(q->offload); if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static int cbs_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct cbs_sched_data *q = qdisc_priv(sch); if (cl != 1 || !q->qdisc) /* only one class */ return -ENOENT; tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); if (!new) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, NULL); if (!new) new = &noop_qdisc; } *old = qdisc_replace(sch, new, &q->qdisc); return 0; } static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg) { struct cbs_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long cbs_find(struct Qdisc *sch, u32 classid) { return 1; } static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { tc_qdisc_stats_dump(sch, 1, walker); } } static const struct Qdisc_class_ops cbs_class_ops = { .graft = cbs_graft, .leaf = cbs_leaf, .find = cbs_find, .walk = cbs_walk, .dump = cbs_dump_class, }; static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .id = "cbs", .cl_ops = &cbs_class_ops, .priv_size = sizeof(struct cbs_sched_data), .enqueue = cbs_enqueue, .dequeue = cbs_dequeue, .peek = qdisc_peek_dequeued, .init = cbs_init, .reset = qdisc_reset_queue, .destroy = cbs_destroy, .change = cbs_change, .dump = cbs_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("cbs"); static struct notifier_block cbs_device_notifier = { .notifier_call = cbs_dev_notifier, }; static int __init cbs_module_init(void) { int err; err = register_netdevice_notifier(&cbs_device_notifier); if (err) return err; err = register_qdisc(&cbs_qdisc_ops); if (err) unregister_netdevice_notifier(&cbs_device_notifier); return err; } static void __exit cbs_module_exit(void) { unregister_qdisc(&cbs_qdisc_ops); unregister_netdevice_notifier(&cbs_device_notifier); } module_init(cbs_module_init) module_exit(cbs_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Credit Based shaper");
354 354 39 39 354 39 354 354 354 354 354 38 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 // SPDX-License-Identifier: GPL-2.0 /* * driver.c - centralized device driver management * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2007 Novell Inc. */ #include <linux/device/driver.h> #include <linux/device.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/sysfs.h> #include "base.h" static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *dev_prv; if (n) { dev_prv = to_device_private_driver(n); dev = dev_prv->device; } return dev; } /** * driver_set_override() - Helper to set or clear driver override. * @dev: Device to change * @override: Address of string to change (e.g. &device->driver_override); * The contents will be freed and hold newly allocated override. * @s: NUL-terminated string, new driver name to force a match, pass empty * string to clear it ("" or "\n", where the latter is only for sysfs * interface). * @len: length of @s * * Helper to set or clear driver override in a device, intended for the cases * when the driver_override field is allocated by driver/bus code. * * Returns: 0 on success or a negative error code on failure. */ int driver_set_override(struct device *dev, const char **override, const char *s, size_t len) { const char *new, *old; char *cp; if (!override || !s) return -EINVAL; /* * The stored value will be used in sysfs show callback (sysfs_emit()), * which has a length limit of PAGE_SIZE and adds a trailing newline. * Thus we can store one character less to avoid truncation during sysfs * show. */ if (len >= (PAGE_SIZE - 1)) return -EINVAL; /* * Compute the real length of the string in case userspace sends us a * bunch of \0 characters like python likes to do. */ len = strlen(s); if (!len) { /* Empty string passed - clear override */ device_lock(dev); old = *override; *override = NULL; device_unlock(dev); kfree(old); return 0; } cp = strnchr(s, len, '\n'); if (cp) len = cp - s; new = kstrndup(s, len, GFP_KERNEL); if (!new) return -ENOMEM; device_lock(dev); old = *override; if (cp != s) { *override = new; } else { /* "\n" passed - clear override */ kfree(new); *override = NULL; } device_unlock(dev); kfree(old); return 0; } EXPORT_SYMBOL_GPL(driver_set_override); /** * driver_for_each_device - Iterator for devices bound to a driver. * @drv: Driver we're iterating. * @start: Device to begin with * @data: Data to pass to the callback. * @fn: Function to call for each device. * * Iterate over the @drv's list of devices calling @fn for each one. */ int driver_for_each_device(struct device_driver *drv, struct device *start, void *data, int (*fn)(struct device *, void *)) { struct klist_iter i; struct device *dev; int error = 0; if (!drv) return -EINVAL; klist_iter_init_node(&drv->p->klist_devices, &i, start ? &start->p->knode_driver : NULL); while (!error && (dev = next_device(&i))) error = fn(dev, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(driver_for_each_device); /** * driver_find_device - device iterator for locating a particular device. * @drv: The device's driver * @start: Device to begin with * @data: Data to pass to match function * @match: Callback function to check device * * This is similar to the driver_for_each_device() function above, but * it returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. */ struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, device_match_t match) { struct klist_iter i; struct device *dev; if (!drv || !drv->p) return NULL; klist_iter_init_node(&drv->p->klist_devices, &i, (start ? &start->p->knode_driver : NULL)); while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) break; klist_iter_exit(&i); return dev; } EXPORT_SYMBOL_GPL(driver_find_device); /** * driver_create_file - create sysfs file for driver. * @drv: driver. * @attr: driver attribute descriptor. */ int driver_create_file(const struct device_driver *drv, const struct driver_attribute *attr) { int error; if (drv) error = sysfs_create_file(&drv->p->kobj, &attr->attr); else error = -EINVAL; return error; } EXPORT_SYMBOL_GPL(driver_create_file); /** * driver_remove_file - remove sysfs file for driver. * @drv: driver. * @attr: driver attribute descriptor. */ void driver_remove_file(const struct device_driver *drv, const struct driver_attribute *attr) { if (drv) sysfs_remove_file(&drv->p->kobj, &attr->attr); } EXPORT_SYMBOL_GPL(driver_remove_file); int driver_add_groups(const struct device_driver *drv, const struct attribute_group **groups) { return sysfs_create_groups(&drv->p->kobj, groups); } void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups) { sysfs_remove_groups(&drv->p->kobj, groups); } /** * driver_register - register driver with bus * @drv: driver to register * * We pass off most of the work to the bus_add_driver() call, * since most of the things we have to do deal with the bus * structures. */ int driver_register(struct device_driver *drv) { int ret; struct device_driver *other; if (!bus_is_registered(drv->bus)) { pr_err("Driver '%s' was unable to register with bus_type '%s' because the bus was not initialized.\n", drv->name, drv->bus->name); return -EINVAL; } if ((drv->bus->probe && drv->probe) || (drv->bus->remove && drv->remove) || (drv->bus->shutdown && drv->shutdown)) pr_warn("Driver '%s' needs updating - please use " "bus_type methods\n", drv->name); other = driver_find(drv->name, drv->bus); if (other) { pr_err("Error: Driver '%s' is already registered, " "aborting...\n", drv->name); return -EBUSY; } ret = bus_add_driver(drv); if (ret) return ret; ret = driver_add_groups(drv, drv->groups); if (ret) { bus_remove_driver(drv); return ret; } kobject_uevent(&drv->p->kobj, KOBJ_ADD); deferred_probe_extend_timeout(); return ret; } EXPORT_SYMBOL_GPL(driver_register); /** * driver_unregister - remove driver from system. * @drv: driver. * * Again, we pass off most of the work to the bus-level call. */ void driver_unregister(struct device_driver *drv) { if (!drv || !drv->p) { WARN(1, "Unexpected driver unregister!\n"); return; } driver_remove_groups(drv, drv->groups); bus_remove_driver(drv); } EXPORT_SYMBOL_GPL(driver_unregister);
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 /* SPDX-License-Identifier: GPL-2.0 */ /* * This file define the new driver API for Wireless Extensions * * Version : 8 16.3.07 * * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> * Copyright (c) 2001-2007 Jean Tourrilhes, All Rights Reserved. */ #ifndef _IW_HANDLER_H #define _IW_HANDLER_H /************************** DOCUMENTATION **************************/ /* * Initial driver API (1996 -> onward) : * ----------------------------------- * The initial API just sends the IOCTL request received from user space * to the driver (via the driver ioctl handler). The driver has to * handle all the rest... * * The initial API also defines a specific handler in struct net_device * to handle wireless statistics. * * The initial APIs served us well and has proven a reasonably good design. * However, there are a few shortcomings : * o No events, everything is a request to the driver. * o Large ioctl function in driver with gigantic switch statement * (i.e. spaghetti code). * o Driver has to mess up with copy_to/from_user, and in many cases * does it unproperly. Common mistakes are : * * buffer overflows (no checks or off by one checks) * * call copy_to/from_user with irq disabled * o The user space interface is tied to ioctl because of the use * copy_to/from_user. * * New driver API (2002 -> onward) : * ------------------------------- * The new driver API is just a bunch of standard functions (handlers), * each handling a specific Wireless Extension. The driver just export * the list of handler it supports, and those will be called appropriately. * * I tried to keep the main advantage of the previous API (simplicity, * efficiency and light weight), and also I provide a good dose of backward * compatibility (most structures are the same, driver can use both API * simultaneously, ...). * Hopefully, I've also addressed the shortcoming of the initial API. * * The advantage of the new API are : * o Handling of Extensions in driver broken in small contained functions * o Tighter checks of ioctl before calling the driver * o Flexible commit strategy (at least, the start of it) * o Backward compatibility (can be mixed with old API) * o Driver doesn't have to worry about memory and user-space issues * The last point is important for the following reasons : * o You are now able to call the new driver API from any API you * want (including from within other parts of the kernel). * o Common mistakes are avoided (buffer overflow, user space copy * with irq disabled and so on). * * The Drawback of the new API are : * o bloat (especially kernel) * o need to migrate existing drivers to new API * My initial testing shows that the new API adds around 3kB to the kernel * and save between 0 and 5kB from a typical driver. * Also, as all structures and data types are unchanged, the migration is * quite straightforward (but tedious). * * --- * * The new driver API is defined below in this file. User space should * not be aware of what's happening down there... * * A new kernel wrapper is in charge of validating the IOCTLs and calling * the appropriate driver handler. This is implemented in : * # net/core/wireless.c * * The driver export the list of handlers in : * # include/linux/netdevice.h (one place) * * The new driver API is available for WIRELESS_EXT >= 13. * Good luck with migration to the new API ;-) */ /* ---------------------- THE IMPLEMENTATION ---------------------- */ /* * Some of the choice I've made are pretty controversial. Defining an * API is very much weighting compromises. This goes into some of the * details and the thinking behind the implementation. * * Implementation goals : * -------------------- * The implementation goals were as follow : * o Obvious : you should not need a PhD to understand what's happening, * the benefit is easier maintenance. * o Flexible : it should accommodate a wide variety of driver * implementations and be as flexible as the old API. * o Lean : it should be efficient memory wise to minimise the impact * on kernel footprint. * o Transparent to user space : the large number of user space * applications that use Wireless Extensions should not need * any modifications. * * Array of functions versus Struct of functions * --------------------------------------------- * 1) Having an array of functions allow the kernel code to access the * handler in a single lookup, which is much more efficient (think hash * table here). * 2) The only drawback is that driver writer may put their handler in * the wrong slot. This is trivial to test (I set the frequency, the * bitrate changes). Once the handler is in the proper slot, it will be * there forever, because the array is only extended at the end. * 3) Backward/forward compatibility : adding new handler just require * extending the array, so you can put newer driver in older kernel * without having to patch the kernel code (and vice versa). * * All handler are of the same generic type * ---------------------------------------- * That's a feature !!! * 1) Having a generic handler allow to have generic code, which is more * efficient. If each of the handler was individually typed I would need * to add a big switch in the kernel (== more bloat). This solution is * more scalable, adding new Wireless Extensions doesn't add new code. * 2) You can use the same handler in different slots of the array. For * hardware, it may be more efficient or logical to handle multiple * Wireless Extensions with a single function, and the API allow you to * do that. (An example would be a single record on the card to control * both bitrate and frequency, the handler would read the old record, * modify it according to info->cmd and rewrite it). * * Functions prototype uses union iwreq_data * ----------------------------------------- * Some would have preferred functions defined this way : * static int mydriver_ioctl_setrate(struct net_device *dev, * long rate, int auto) * 1) The kernel code doesn't "validate" the content of iwreq_data, and * can't do it (different hardware may have different notion of what a * valid frequency is), so we don't pretend that we do it. * 2) The above form is not extendable. If I want to add a flag (for * example to distinguish setting max rate and basic rate), I would * break the prototype. Using iwreq_data is more flexible. * 3) Also, the above form is not generic (see above). * 4) I don't expect driver developer using the wrong field of the * union (Doh !), so static typechecking doesn't add much value. * 5) Lastly, you can skip the union by doing : * static int mydriver_ioctl_setrate(struct net_device *dev, * struct iw_request_info *info, * struct iw_param *rrq, * char *extra) * And then adding the handler in the array like this : * (iw_handler) mydriver_ioctl_setrate, // SIOCSIWRATE * * Using functions and not a registry * ---------------------------------- * Another implementation option would have been for every instance to * define a registry (a struct containing all the Wireless Extensions) * and only have a function to commit the registry to the hardware. * 1) This approach can be emulated by the current code, but not * vice versa. * 2) Some drivers don't keep any configuration in the driver, for them * adding such a registry would be a significant bloat. * 3) The code to translate from Wireless Extension to native format is * needed anyway, so it would not reduce significantely the amount of code. * 4) The current approach only selectively translate Wireless Extensions * to native format and only selectively set, whereas the registry approach * would require to translate all WE and set all parameters for any single * change. * 5) For many Wireless Extensions, the GET operation return the current * dynamic value, not the value that was set. * * This header is <net/iw_handler.h> * --------------------------------- * 1) This header is kernel space only and should not be exported to * user space. Headers in "include/linux/" are exported, headers in * "include/net/" are not. * * Mixed 32/64 bit issues * ---------------------- * The Wireless Extensions are designed to be 64 bit clean, by using only * datatypes with explicit storage size. * There are some issues related to kernel and user space using different * memory model, and in particular 64bit kernel with 32bit user space. * The problem is related to struct iw_point, that contains a pointer * that *may* need to be translated. * This is quite messy. The new API doesn't solve this problem (it can't), * but is a step in the right direction : * 1) Meta data about each ioctl is easily available, so we know what type * of translation is needed. * 2) The move of data between kernel and user space is only done in a single * place in the kernel, so adding specific hooks in there is possible. * 3) In the long term, it allows to move away from using ioctl as the * user space API. * * So many comments and so few code * -------------------------------- * That's a feature. Comments won't bloat the resulting kernel binary. */ /***************************** INCLUDES *****************************/ #include <linux/wireless.h> /* IOCTL user space API */ #include <linux/if_ether.h> /***************************** VERSION *****************************/ /* * This constant is used to know which version of the driver API is * available. Hopefully, this will be pretty stable and no changes * will be needed... * I just plan to increment with each new version. */ #define IW_HANDLER_VERSION 8 /* * Changes : * * V2 to V3 * -------- * - Move event definition in <linux/wireless.h> * - Add Wireless Event support : * o wireless_send_event() prototype * o iwe_stream_add_event/point() inline functions * V3 to V4 * -------- * - Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes * * V4 to V5 * -------- * - Add new spy support : struct iw_spy_data & prototypes * * V5 to V6 * -------- * - Change the way we get to spy_data method for added safety * - Remove spy #ifdef, they are always on -> cleaner code * - Add IW_DESCR_FLAG_NOMAX flag for very large requests * - Start migrating get_wireless_stats to struct iw_handler_def * * V6 to V7 * -------- * - Add struct ieee80211_device pointer in struct iw_public_data * - Remove (struct iw_point *)->pointer from events and streams * - Remove spy_offset from struct iw_handler_def * - Add "check" version of event macros for ieee802.11 stack * * V7 to V8 * ---------- * - Prevent leaking of kernel space in stream on 64 bits. */ /**************************** CONSTANTS ****************************/ /* Enhanced spy support available */ #define IW_WIRELESS_SPY #define IW_WIRELESS_THRSPY /* Special error message for the driver to indicate that we * should do a commit after return from the iw_handler */ #define EIWCOMMIT EINPROGRESS /* Flags available in struct iw_request_info */ #define IW_REQUEST_FLAG_COMPAT 0x0001 /* Compat ioctl call */ /* Type of headers we know about (basically union iwreq_data) */ #define IW_HEADER_TYPE_NULL 0 /* Not available */ #define IW_HEADER_TYPE_CHAR 2 /* char [IFNAMSIZ] */ #define IW_HEADER_TYPE_UINT 4 /* __u32 */ #define IW_HEADER_TYPE_FREQ 5 /* struct iw_freq */ #define IW_HEADER_TYPE_ADDR 6 /* struct sockaddr */ #define IW_HEADER_TYPE_POINT 8 /* struct iw_point */ #define IW_HEADER_TYPE_PARAM 9 /* struct iw_param */ #define IW_HEADER_TYPE_QUAL 10 /* struct iw_quality */ /* Handling flags */ /* Most are not implemented. I just use them as a reminder of some * cool features we might need one day ;-) */ #define IW_DESCR_FLAG_NONE 0x0000 /* Obvious */ /* Wrapper level flags */ #define IW_DESCR_FLAG_DUMP 0x0001 /* Not part of the dump command */ #define IW_DESCR_FLAG_EVENT 0x0002 /* Generate an event on SET */ #define IW_DESCR_FLAG_RESTRICT 0x0004 /* GET : request is ROOT only */ /* SET : Omit payload from generated iwevent */ #define IW_DESCR_FLAG_NOMAX 0x0008 /* GET : no limit on request size */ /****************************** TYPES ******************************/ /* ----------------------- WIRELESS HANDLER ----------------------- */ /* * A wireless handler is just a standard function, that looks like the * ioctl handler. * We also define there how a handler list look like... As the Wireless * Extension space is quite dense, we use a simple array, which is faster * (that's the perfect hash table ;-). */ /* * Meta data about the request passed to the iw_handler. * Most handlers can safely ignore what's in there. * The 'cmd' field might come handy if you want to use the same handler * for multiple command... * This struct is also my long term insurance. I can add new fields here * without breaking the prototype of iw_handler... */ struct iw_request_info { __u16 cmd; /* Wireless Extension command */ __u16 flags; /* More to come ;-) */ }; struct net_device; /* * This is how a function handling a Wireless Extension should look * like (both get and set, standard and private). */ typedef int (*iw_handler)(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra); /* * This define all the handler that the driver export. * As you need only one per driver type, please use a static const * shared by all driver instances... Same for the members... * This will be linked from net_device in <linux/netdevice.h> */ struct iw_handler_def { /* Array of handlers for standard ioctls * We will call dev->wireless_handlers->standard[ioctl - SIOCIWFIRST] */ const iw_handler * standard; /* Number of handlers defined (more precisely, index of the * last defined handler + 1) */ __u16 num_standard; #ifdef CONFIG_WEXT_PRIV __u16 num_private; /* Number of private arg description */ __u16 num_private_args; /* Array of handlers for private ioctls * Will call dev->wireless_handlers->private[ioctl - SIOCIWFIRSTPRIV] */ const iw_handler * private; /* Arguments of private handler. This one is just a list, so you * can put it in any order you want and should not leave holes... * We will automatically export that to user space... */ const struct iw_priv_args * private_args; #endif /* New location of get_wireless_stats, to de-bloat struct net_device. * The old pointer in struct net_device will be gradually phased * out, and drivers are encouraged to use this one... */ struct iw_statistics* (*get_wireless_stats)(struct net_device *dev); }; /* ---------------------- IOCTL DESCRIPTION ---------------------- */ /* * One of the main goal of the new interface is to deal entirely with * user space/kernel space memory move. * For that, we need to know : * o if iwreq is a pointer or contain the full data * o what is the size of the data to copy * * For private IOCTLs, we use the same rules as used by iwpriv and * defined in struct iw_priv_args. * * For standard IOCTLs, things are quite different and we need to * use the structures below. Actually, this struct is also more * efficient, but that's another story... */ /* * Describe how a standard IOCTL looks like. */ struct iw_ioctl_description { __u8 header_type; /* NULL, iw_point or other */ __u8 flags; /* Special handling of the request */ __u16 token_size; /* Granularity of payload */ __u16 min_tokens; /* Min acceptable token number */ __u16 max_tokens; /* Max acceptable token number */ }; /* Need to think of short header translation table. Later. */ /* --------------------- ENHANCED SPY SUPPORT --------------------- */ /* * In the old days, the driver was handling spy support all by itself. * Now, the driver can delegate this task to Wireless Extensions. * It needs to include this struct in its private part and use the * standard spy iw_handler. */ /* * Instance specific spy data, i.e. addresses spied and quality for them. */ struct iw_spy_data { /* --- Standard spy support --- */ int spy_number; u_char spy_address[IW_MAX_SPY][ETH_ALEN]; struct iw_quality spy_stat[IW_MAX_SPY]; /* --- Enhanced spy support (event) */ struct iw_quality spy_thr_low; /* Low threshold */ struct iw_quality spy_thr_high; /* High threshold */ u_char spy_thr_under[IW_MAX_SPY]; }; /**************************** PROTOTYPES ****************************/ /* * Functions part of the Wireless Extensions (defined in net/wireless/wext-core.c). * Those may be called by driver modules. */ /* Send a single event to user space */ void wireless_send_event(struct net_device *dev, unsigned int cmd, union iwreq_data *wrqu, const char *extra); #ifdef CONFIG_WEXT_CORE /* flush all previous wext events - if work is done from netdev notifiers */ void wireless_nlevent_flush(void); #else static inline void wireless_nlevent_flush(void) {} #endif /* We may need a function to send a stream of events to user space. * More on that later... */ /************************* INLINE FUNCTIONS *************************/ /* * Function that are so simple that it's more efficient inlining them */ static inline int iwe_stream_lcp_len(struct iw_request_info *info) { #ifdef CONFIG_COMPAT if (info->flags & IW_REQUEST_FLAG_COMPAT) return IW_EV_COMPAT_LCP_LEN; #endif return IW_EV_LCP_LEN; } static inline int iwe_stream_point_len(struct iw_request_info *info) { #ifdef CONFIG_COMPAT if (info->flags & IW_REQUEST_FLAG_COMPAT) return IW_EV_COMPAT_POINT_LEN; #endif return IW_EV_POINT_LEN; } static inline int iwe_stream_event_len_adjust(struct iw_request_info *info, int event_len) { #ifdef CONFIG_COMPAT if (info->flags & IW_REQUEST_FLAG_COMPAT) { event_len -= IW_EV_LCP_LEN; event_len += IW_EV_COMPAT_LCP_LEN; } #endif return event_len; } /*------------------------------------------------------------------*/ /* * Wrapper to add an Wireless Event to a stream of events. */ char *iwe_stream_add_event(struct iw_request_info *info, char *stream, char *ends, struct iw_event *iwe, int event_len); static inline char * iwe_stream_add_event_check(struct iw_request_info *info, char *stream, char *ends, struct iw_event *iwe, int event_len) { char *res = iwe_stream_add_event(info, stream, ends, iwe, event_len); if (res == stream) return ERR_PTR(-E2BIG); return res; } /*------------------------------------------------------------------*/ /* * Wrapper to add an short Wireless Event containing a pointer to a * stream of events. */ char *iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, struct iw_event *iwe, char *extra); static inline char * iwe_stream_add_point_check(struct iw_request_info *info, char *stream, char *ends, struct iw_event *iwe, char *extra) { char *res = iwe_stream_add_point(info, stream, ends, iwe, extra); if (res == stream) return ERR_PTR(-E2BIG); return res; } /*------------------------------------------------------------------*/ /* * Wrapper to add a value to a Wireless Event in a stream of events. * Be careful, this one is tricky to use properly : * At the first run, you need to have (value = event + IW_EV_LCP_LEN). */ char *iwe_stream_add_value(struct iw_request_info *info, char *event, char *value, char *ends, struct iw_event *iwe, int event_len); #endif /* _IW_HANDLER_H */
7 7 7 7 11 11 7 9 1 6 8 8 25 1 2 7 9 2 4 15 6 13 2 19 6 1 7 4 2 3 8 3 8 7 4 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 // SPDX-License-Identifier: GPL-2.0 /* * Quota code necessary even when VFS quota support is not compiled * into the kernel. The interesting stuff is over in dquot.c, here * we have symbols for initial quotactl(2) handling, the sysctl(2) * variables, etc - things needed even when quota support disabled. */ #include <linux/fs.h> #include <linux/namei.h> #include <linux/slab.h> #include <asm/current.h> #include <linux/blkdev.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/types.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/nospec.h> #include "compat.h" #include "../internal.h" static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) { switch (cmd) { /* these commands do not require any special privilegues */ case Q_GETFMT: case Q_SYNC: case Q_GETINFO: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XQUOTASYNC: break; /* allow to query information for dquots we "own" */ case Q_GETQUOTA: case Q_XGETQUOTA: if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) || (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id)))) break; fallthrough; default: if (!capable(CAP_SYS_ADMIN)) return -EPERM; } return security_quotactl(cmd, type, id, sb); } static void quota_sync_one(struct super_block *sb, void *arg) { int type = *(int *)arg; if (sb->s_qcop && sb->s_qcop->quota_sync && (sb->s_quota_types & (1 << type))) sb->s_qcop->quota_sync(sb, type); } static int quota_sync_all(int type) { int ret; ret = security_quotactl(Q_SYNC, type, 0, NULL); if (!ret) iterate_supers(quota_sync_one, &type); return ret; } unsigned int qtype_enforce_flag(int type) { switch (type) { case USRQUOTA: return FS_QUOTA_UDQ_ENFD; case GRPQUOTA: return FS_QUOTA_GDQ_ENFD; case PRJQUOTA: return FS_QUOTA_PDQ_ENFD; } return 0; } static int quota_quotaon(struct super_block *sb, int type, qid_t id, const struct path *path) { if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) return -ENOSYS; if (sb->s_qcop->quota_enable) return sb->s_qcop->quota_enable(sb, qtype_enforce_flag(type)); if (IS_ERR(path)) return PTR_ERR(path); return sb->s_qcop->quota_on(sb, type, id, path); } static int quota_quotaoff(struct super_block *sb, int type) { if (!sb->s_qcop->quota_off && !sb->s_qcop->quota_disable) return -ENOSYS; if (sb->s_qcop->quota_disable) return sb->s_qcop->quota_disable(sb, qtype_enforce_flag(type)); return sb->s_qcop->quota_off(sb, type); } static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; if (!sb_has_quota_active(sb, type)) return -ESRCH; fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; } static int quota_getinfo(struct super_block *sb, int type, void __user *addr) { struct qc_state state; struct qc_type_state *tstate; struct if_dqinfo uinfo; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; ret = sb->s_qcop->get_state(sb, &state); if (ret) return ret; tstate = state.s_state + type; if (!(tstate->flags & QCI_ACCT_ENABLED)) return -ESRCH; memset(&uinfo, 0, sizeof(uinfo)); uinfo.dqi_bgrace = tstate->spc_timelimit; uinfo.dqi_igrace = tstate->ino_timelimit; if (tstate->flags & QCI_SYSFILE) uinfo.dqi_flags |= DQF_SYS_FILE; if (tstate->flags & QCI_ROOT_SQUASH) uinfo.dqi_flags |= DQF_ROOT_SQUASH; uinfo.dqi_valid = IIF_ALL; if (copy_to_user(addr, &uinfo, sizeof(uinfo))) return -EFAULT; return 0; } static int quota_setinfo(struct super_block *sb, int type, void __user *addr) { struct if_dqinfo info; struct qc_info qinfo; if (copy_from_user(&info, addr, sizeof(info))) return -EFAULT; if (!sb->s_qcop->set_info) return -ENOSYS; if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE)) return -EINVAL; memset(&qinfo, 0, sizeof(qinfo)); if (info.dqi_valid & IIF_FLAGS) { if (info.dqi_flags & ~DQF_SETINFO_MASK) return -EINVAL; if (info.dqi_flags & DQF_ROOT_SQUASH) qinfo.i_flags |= QCI_ROOT_SQUASH; qinfo.i_fieldmask |= QC_FLAGS; } if (info.dqi_valid & IIF_BGRACE) { qinfo.i_spc_timelimit = info.dqi_bgrace; qinfo.i_fieldmask |= QC_SPC_TIMER; } if (info.dqi_valid & IIF_IGRACE) { qinfo.i_ino_timelimit = info.dqi_igrace; qinfo.i_fieldmask |= QC_INO_TIMER; } return sb->s_qcop->set_info(sb, type, &qinfo); } static inline qsize_t qbtos(qsize_t blocks) { return blocks << QIF_DQBLKSIZE_BITS; } static inline qsize_t stoqb(qsize_t space) { return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; } static void copy_to_if_dqblk(struct if_dqblk *dst, struct qc_dqblk *src) { memset(dst, 0, sizeof(*dst)); dst->dqb_bhardlimit = stoqb(src->d_spc_hardlimit); dst->dqb_bsoftlimit = stoqb(src->d_spc_softlimit); dst->dqb_curspace = src->d_space; dst->dqb_ihardlimit = src->d_ino_hardlimit; dst->dqb_isoftlimit = src->d_ino_softlimit; dst->dqb_curinodes = src->d_ino_count; dst->dqb_btime = src->d_spc_timer; dst->dqb_itime = src->d_ino_timer; dst->dqb_valid = QIF_ALL; } static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; struct qc_dqblk fdq; struct if_dqblk idq; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); if (ret) return ret; copy_to_if_dqblk(&idq, &fdq); if (compat_need_64bit_alignment_fixup()) { struct compat_if_dqblk __user *compat_dqblk = addr; if (copy_to_user(compat_dqblk, &idq, sizeof(*compat_dqblk))) return -EFAULT; if (put_user(idq.dqb_valid, &compat_dqblk->dqb_valid)) return -EFAULT; } else { if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; } return 0; } /* * Return quota for next active quota >= this id, if any exists, * otherwise return -ENOENT via ->get_nextdqblk */ static int quota_getnextquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; struct qc_dqblk fdq; struct if_nextdqblk idq; int ret; if (!sb->s_qcop->get_nextdqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq); if (ret) return ret; /* struct if_nextdqblk is a superset of struct if_dqblk */ copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq); idq.dqb_id = from_kqid(current_user_ns(), qid); if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; return 0; } static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); dst->d_spc_softlimit = qbtos(src->dqb_bsoftlimit); dst->d_space = src->dqb_curspace; dst->d_ino_hardlimit = src->dqb_ihardlimit; dst->d_ino_softlimit = src->dqb_isoftlimit; dst->d_ino_count = src->dqb_curinodes; dst->d_spc_timer = src->dqb_btime; dst->d_ino_timer = src->dqb_itime; dst->d_fieldmask = 0; if (src->dqb_valid & QIF_BLIMITS) dst->d_fieldmask |= QC_SPC_SOFT | QC_SPC_HARD; if (src->dqb_valid & QIF_SPACE) dst->d_fieldmask |= QC_SPACE; if (src->dqb_valid & QIF_ILIMITS) dst->d_fieldmask |= QC_INO_SOFT | QC_INO_HARD; if (src->dqb_valid & QIF_INODES) dst->d_fieldmask |= QC_INO_COUNT; if (src->dqb_valid & QIF_BTIME) dst->d_fieldmask |= QC_SPC_TIMER; if (src->dqb_valid & QIF_ITIME) dst->d_fieldmask |= QC_INO_TIMER; } static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct qc_dqblk fdq; struct if_dqblk idq; struct kqid qid; if (compat_need_64bit_alignment_fixup()) { struct compat_if_dqblk __user *compat_dqblk = addr; if (copy_from_user(&idq, compat_dqblk, sizeof(*compat_dqblk)) || get_user(idq.dqb_valid, &compat_dqblk->dqb_valid)) return -EFAULT; } else { if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; } if (!sb->s_qcop->set_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; copy_from_if_dqblk(&fdq, &idq); return sb->s_qcop->set_dqblk(sb, qid, &fdq); } static int quota_enable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->quota_enable) return -ENOSYS; return sb->s_qcop->quota_enable(sb, flags); } static int quota_disable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->quota_disable) return -ENOSYS; return sb->s_qcop->quota_disable(sb, flags); } static int quota_state_to_flags(struct qc_state *state) { int flags = 0; if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_UDQ_ACCT; if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_UDQ_ENFD; if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_GDQ_ACCT; if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_GDQ_ENFD; if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_PDQ_ACCT; if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_PDQ_ENFD; return flags; } static int quota_getstate(struct super_block *sb, int type, struct fs_quota_stat *fqs) { struct qc_state state; int ret; memset(&state, 0, sizeof (struct qc_state)); ret = sb->s_qcop->get_state(sb, &state); if (ret < 0) return ret; memset(fqs, 0, sizeof(*fqs)); fqs->qs_version = FS_QSTAT_VERSION; fqs->qs_flags = quota_state_to_flags(&state); /* No quota enabled? */ if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit; fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit; /* Inodes may be allocated even if inactive; copy out if present */ if (state.s_state[USRQUOTA].ino) { fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino; fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks; fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents; } if (state.s_state[GRPQUOTA].ino) { fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents; } if (state.s_state[PRJQUOTA].ino) { /* * Q_XGETQSTAT doesn't have room for both group and project * quotas. So, allow the project quota values to be copied out * only if there is no group quota information available. */ if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) { fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[PRJQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[PRJQUOTA].nextents; } } return 0; } static int compat_copy_fs_qfilestat(struct compat_fs_qfilestat __user *to, struct fs_qfilestat *from) { if (copy_to_user(to, from, sizeof(*to)) || put_user(from->qfs_nextents, &to->qfs_nextents)) return -EFAULT; return 0; } static int compat_copy_fs_quota_stat(struct compat_fs_quota_stat __user *to, struct fs_quota_stat *from) { if (put_user(from->qs_version, &to->qs_version) || put_user(from->qs_flags, &to->qs_flags) || put_user(from->qs_pad, &to->qs_pad) || compat_copy_fs_qfilestat(&to->qs_uquota, &from->qs_uquota) || compat_copy_fs_qfilestat(&to->qs_gquota, &from->qs_gquota) || put_user(from->qs_incoredqs, &to->qs_incoredqs) || put_user(from->qs_btimelimit, &to->qs_btimelimit) || put_user(from->qs_itimelimit, &to->qs_itimelimit) || put_user(from->qs_rtbtimelimit, &to->qs_rtbtimelimit) || put_user(from->qs_bwarnlimit, &to->qs_bwarnlimit) || put_user(from->qs_iwarnlimit, &to->qs_iwarnlimit)) return -EFAULT; return 0; } static int quota_getxstate(struct super_block *sb, int type, void __user *addr) { struct fs_quota_stat fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; ret = quota_getstate(sb, type, &fqs); if (ret) return ret; if (compat_need_64bit_alignment_fixup()) return compat_copy_fs_quota_stat(addr, &fqs); if (copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return 0; } static int quota_getstatev(struct super_block *sb, int type, struct fs_quota_statv *fqs) { struct qc_state state; int ret; memset(&state, 0, sizeof (struct qc_state)); ret = sb->s_qcop->get_state(sb, &state); if (ret < 0) return ret; memset(fqs, 0, sizeof(*fqs)); fqs->qs_version = FS_QSTAT_VERSION; fqs->qs_flags = quota_state_to_flags(&state); /* No quota enabled? */ if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit; fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit; fqs->qs_rtbwarnlimit = state.s_state[type].rt_spc_warnlimit; /* Inodes may be allocated even if inactive; copy out if present */ if (state.s_state[USRQUOTA].ino) { fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino; fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks; fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents; } if (state.s_state[GRPQUOTA].ino) { fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents; } if (state.s_state[PRJQUOTA].ino) { fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino; fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks; fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents; } return 0; } static int quota_getxstatev(struct super_block *sb, int type, void __user *addr) { struct fs_quota_statv fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; memset(&fqs, 0, sizeof(fqs)); if (copy_from_user(&fqs, addr, 1)) /* Just read qs_version */ return -EFAULT; /* If this kernel doesn't support user specified version, fail */ switch (fqs.qs_version) { case FS_QSTATV_VERSION1: break; default: return -EINVAL; } ret = quota_getstatev(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; } /* * XFS defines BBTOB and BTOBB macros inside fs/xfs/ and we cannot move them * out of there as xfsprogs rely on definitions being in that header file. So * just define same functions here for quota purposes. */ #define XFS_BB_SHIFT 9 static inline u64 quota_bbtob(u64 blocks) { return blocks << XFS_BB_SHIFT; } static inline u64 quota_btobb(u64 bytes) { return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT; } static inline s64 copy_from_xfs_dqblk_ts(const struct fs_disk_quota *d, __s32 timer, __s8 timer_hi) { if (d->d_fieldmask & FS_DQ_BIGTIME) return (u32)timer | (s64)timer_hi << 32; return timer; } static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src) { dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit); dst->d_spc_softlimit = quota_bbtob(src->d_blk_softlimit); dst->d_ino_hardlimit = src->d_ino_hardlimit; dst->d_ino_softlimit = src->d_ino_softlimit; dst->d_space = quota_bbtob(src->d_bcount); dst->d_ino_count = src->d_icount; dst->d_ino_timer = copy_from_xfs_dqblk_ts(src, src->d_itimer, src->d_itimer_hi); dst->d_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_btimer, src->d_btimer_hi); dst->d_ino_warns = src->d_iwarns; dst->d_spc_warns = src->d_bwarns; dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit); dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit); dst->d_rt_space = quota_bbtob(src->d_rtbcount); dst->d_rt_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_rtbtimer, src->d_rtbtimer_hi); dst->d_rt_spc_warns = src->d_rtbwarns; dst->d_fieldmask = 0; if (src->d_fieldmask & FS_DQ_ISOFT) dst->d_fieldmask |= QC_INO_SOFT; if (src->d_fieldmask & FS_DQ_IHARD) dst->d_fieldmask |= QC_INO_HARD; if (src->d_fieldmask & FS_DQ_BSOFT) dst->d_fieldmask |= QC_SPC_SOFT; if (src->d_fieldmask & FS_DQ_BHARD) dst->d_fieldmask |= QC_SPC_HARD; if (src->d_fieldmask & FS_DQ_RTBSOFT) dst->d_fieldmask |= QC_RT_SPC_SOFT; if (src->d_fieldmask & FS_DQ_RTBHARD) dst->d_fieldmask |= QC_RT_SPC_HARD; if (src->d_fieldmask & FS_DQ_BTIMER) dst->d_fieldmask |= QC_SPC_TIMER; if (src->d_fieldmask & FS_DQ_ITIMER) dst->d_fieldmask |= QC_INO_TIMER; if (src->d_fieldmask & FS_DQ_RTBTIMER) dst->d_fieldmask |= QC_RT_SPC_TIMER; if (src->d_fieldmask & FS_DQ_BWARNS) dst->d_fieldmask |= QC_SPC_WARNS; if (src->d_fieldmask & FS_DQ_IWARNS) dst->d_fieldmask |= QC_INO_WARNS; if (src->d_fieldmask & FS_DQ_RTBWARNS) dst->d_fieldmask |= QC_RT_SPC_WARNS; if (src->d_fieldmask & FS_DQ_BCOUNT) dst->d_fieldmask |= QC_SPACE; if (src->d_fieldmask & FS_DQ_ICOUNT) dst->d_fieldmask |= QC_INO_COUNT; if (src->d_fieldmask & FS_DQ_RTBCOUNT) dst->d_fieldmask |= QC_RT_SPACE; } static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst, struct fs_disk_quota *src) { memset(dst, 0, sizeof(*dst)); dst->i_spc_timelimit = src->d_btimer; dst->i_ino_timelimit = src->d_itimer; dst->i_rt_spc_timelimit = src->d_rtbtimer; dst->i_ino_warnlimit = src->d_iwarns; dst->i_spc_warnlimit = src->d_bwarns; dst->i_rt_spc_warnlimit = src->d_rtbwarns; if (src->d_fieldmask & FS_DQ_BWARNS) dst->i_fieldmask |= QC_SPC_WARNS; if (src->d_fieldmask & FS_DQ_IWARNS) dst->i_fieldmask |= QC_INO_WARNS; if (src->d_fieldmask & FS_DQ_RTBWARNS) dst->i_fieldmask |= QC_RT_SPC_WARNS; if (src->d_fieldmask & FS_DQ_BTIMER) dst->i_fieldmask |= QC_SPC_TIMER; if (src->d_fieldmask & FS_DQ_ITIMER) dst->i_fieldmask |= QC_INO_TIMER; if (src->d_fieldmask & FS_DQ_RTBTIMER) dst->i_fieldmask |= QC_RT_SPC_TIMER; } static int quota_setxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; if (!sb->s_qcop->set_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; /* Are we actually setting timer / warning limits for all users? */ if (from_kqid(sb->s_user_ns, qid) == 0 && fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) { struct qc_info qinfo; int ret; if (!sb->s_qcop->set_info) return -EINVAL; copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq); ret = sb->s_qcop->set_info(sb, type, &qinfo); if (ret) return ret; /* These are already done */ fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK); } copy_from_xfs_dqblk(&qdq, &fdq); return sb->s_qcop->set_dqblk(sb, qid, &qdq); } static inline void copy_to_xfs_dqblk_ts(const struct fs_disk_quota *d, __s32 *timer_lo, __s8 *timer_hi, s64 timer) { *timer_lo = timer; if (d->d_fieldmask & FS_DQ_BIGTIME) *timer_hi = timer >> 32; } static inline bool want_bigtime(s64 timer) { return timer > S32_MAX || timer < S32_MIN; } static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src, int type, qid_t id) { memset(dst, 0, sizeof(*dst)); if (want_bigtime(src->d_ino_timer) || want_bigtime(src->d_spc_timer) || want_bigtime(src->d_rt_spc_timer)) dst->d_fieldmask |= FS_DQ_BIGTIME; dst->d_version = FS_DQUOT_VERSION; dst->d_id = id; if (type == USRQUOTA) dst->d_flags = FS_USER_QUOTA; else if (type == PRJQUOTA) dst->d_flags = FS_PROJ_QUOTA; else dst->d_flags = FS_GROUP_QUOTA; dst->d_blk_hardlimit = quota_btobb(src->d_spc_hardlimit); dst->d_blk_softlimit = quota_btobb(src->d_spc_softlimit); dst->d_ino_hardlimit = src->d_ino_hardlimit; dst->d_ino_softlimit = src->d_ino_softlimit; dst->d_bcount = quota_btobb(src->d_space); dst->d_icount = src->d_ino_count; copy_to_xfs_dqblk_ts(dst, &dst->d_itimer, &dst->d_itimer_hi, src->d_ino_timer); copy_to_xfs_dqblk_ts(dst, &dst->d_btimer, &dst->d_btimer_hi, src->d_spc_timer); dst->d_iwarns = src->d_ino_warns; dst->d_bwarns = src->d_spc_warns; dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit); dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit); dst->d_rtbcount = quota_btobb(src->d_rt_space); copy_to_xfs_dqblk_ts(dst, &dst->d_rtbtimer, &dst->d_rtbtimer_hi, src->d_rt_spc_timer); dst->d_rtbwarns = src->d_rt_spc_warns; } static int quota_getxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_dqblk(sb, qid, &qdq); if (ret) return ret; copy_to_xfs_dqblk(&fdq, &qdq, type, id); if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } /* * Return quota for next active quota >= this id, if any exists, * otherwise return -ENOENT via ->get_nextdqblk. */ static int quota_getnextxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; qid_t id_out; int ret; if (!sb->s_qcop->get_nextdqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq); if (ret) return ret; id_out = from_kqid(current_user_ns(), qid); copy_to_xfs_dqblk(&fdq, &qdq, type, id_out); if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } static int quota_rmxquota(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->rm_xquota) return -ENOSYS; return sb->s_qcop->rm_xquota(sb, flags); } /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr, const struct path *path) { int ret; type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. */ if (!sb->s_qcop) return -ENOSYS; if (!(sb->s_quota_types & (1 << type))) return -EINVAL; ret = check_quotactl_permission(sb, type, cmd, id); if (ret < 0) return ret; switch (cmd) { case Q_QUOTAON: return quota_quotaon(sb, type, id, path); case Q_QUOTAOFF: return quota_quotaoff(sb, type); case Q_GETFMT: return quota_getfmt(sb, type, addr); case Q_GETINFO: return quota_getinfo(sb, type, addr); case Q_SETINFO: return quota_setinfo(sb, type, addr); case Q_GETQUOTA: return quota_getquota(sb, type, id, addr); case Q_GETNEXTQUOTA: return quota_getnextquota(sb, type, id, addr); case Q_SETQUOTA: return quota_setquota(sb, type, id, addr); case Q_SYNC: if (!sb->s_qcop->quota_sync) return -ENOSYS; return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: return quota_enable(sb, addr); case Q_XQUOTAOFF: return quota_disable(sb, addr); case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: return quota_getxstate(sb, type, addr); case Q_XGETQSTATV: return quota_getxstatev(sb, type, addr); case Q_XSETQLIM: return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: return quota_getxquota(sb, type, id, addr); case Q_XGETNEXTQUOTA: return quota_getnextxquota(sb, type, id, addr); case Q_XQUOTASYNC: if (sb_rdonly(sb)) return -EROFS; /* XFS quotas are fully coherent now, making this call a noop */ return 0; default: return -EINVAL; } } /* Return 1 if 'cmd' will block on frozen filesystem */ static int quotactl_cmd_write(int cmd) { /* * We cannot allow Q_GETQUOTA and Q_GETNEXTQUOTA without write access * as dquot_acquire() may allocate space for new structure and OCFS2 * needs to increment on-disk use count. */ switch (cmd) { case Q_GETFMT: case Q_GETINFO: case Q_SYNC: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XGETQUOTA: case Q_XGETNEXTQUOTA: case Q_XQUOTASYNC: return 0; } return 1; } /* Return true if quotactl command is manipulating quota on/off state */ static bool quotactl_cmd_onoff(int cmd) { return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF) || (cmd == Q_XQUOTAON) || (cmd == Q_XQUOTAOFF); } /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon */ static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK struct super_block *sb; struct filename *tmp = getname(special); bool excl = false, thawed = false; int error; dev_t dev; if (IS_ERR(tmp)) return ERR_CAST(tmp); error = lookup_bdev(tmp->name, &dev); putname(tmp); if (error) return ERR_PTR(error); if (quotactl_cmd_onoff(cmd)) { excl = true; thawed = true; } else if (quotactl_cmd_write(cmd)) { thawed = true; } retry: sb = user_get_super(dev, excl); if (!sb) return ERR_PTR(-ENODEV); if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { if (excl) up_write(&sb->s_umount); else up_read(&sb->s_umount); /* Wait for sb to unfreeze */ sb_start_write(sb); sb_end_write(sb); put_super(sb); goto retry; } return sb; #else return ERR_PTR(-ENODEV); #endif } /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota * calls. Maybe we need to add the process quotas etc. in the future, * but we probably should use rlimits for that. */ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr) { uint cmds, type; struct super_block *sb = NULL; struct path path, *pathp = NULL; int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if (type >= MAXQUOTAS) return -EINVAL; /* * As a special case Q_SYNC can be called without a specific device. * It will iterate all superblocks that have quota enabled and call * the sync action on each of them. */ if (!special) { if (cmds == Q_SYNC) return quota_sync_all(type); return -ENODEV; } /* * Path for quotaon has to be resolved before grabbing superblock * because that gets s_umount sem which is also possibly needed by path * resolution (think about autofs) and thus deadlocks could arise. */ if (cmds == Q_QUOTAON) { ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); if (ret) pathp = ERR_PTR(ret); else pathp = &path; } sb = quotactl_block(special, cmds); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto out; } ret = do_quotactl(sb, type, cmds, id, addr, pathp); if (!quotactl_cmd_onoff(cmds)) drop_super(sb); else drop_super_exclusive(sb); out: if (pathp && !IS_ERR(pathp)) path_put(pathp); return ret; } SYSCALL_DEFINE4(quotactl_fd, unsigned int, fd, unsigned int, cmd, qid_t, id, void __user *, addr) { struct super_block *sb; unsigned int cmds = cmd >> SUBCMDSHIFT; unsigned int type = cmd & SUBCMDMASK; CLASS(fd_raw, f)(fd); int ret; if (fd_empty(f)) return -EBADF; if (type >= MAXQUOTAS) return -EINVAL; if (quotactl_cmd_write(cmds)) { ret = mnt_want_write(fd_file(f)->f_path.mnt); if (ret) return ret; } sb = fd_file(f)->f_path.mnt->mnt_sb; if (quotactl_cmd_onoff(cmds)) down_write(&sb->s_umount); else down_read(&sb->s_umount); ret = do_quotactl(sb, type, cmds, id, addr, ERR_PTR(-EINVAL)); if (quotactl_cmd_onoff(cmds)) up_write(&sb->s_umount); else up_read(&sb->s_umount); if (quotactl_cmd_write(cmds)) mnt_drop_write(fd_file(f)->f_path.mnt); return ret; }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 // SPDX-License-Identifier: GPL-2.0+ /* * Special Initializers for certain USB Mass Storage devices * * Current development and maintenance by: * (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net) * * This driver is based on the 'USB Mass Storage Class' document. This * describes in detail the protocol used to communicate with such * devices. Clearly, the designers had SCSI and ATAPI commands in * mind when they created this document. The commands are all very * similar to commands in the SCSI-II and ATAPI specifications. * * It is important to note that in a number of cases this class * exhibits class-specific exemptions from the USB specification. * Notably the usage of NAK, STALL and ACK differs from the norm, in * that they are used to communicate wait, failed and OK on commands. * * Also, for certain devices, the interrupt endpoint is used to convey * status of a command. */ #include <linux/errno.h> #include "usb.h" #include "initializers.h" #include "debug.h" #include "transport.h" /* * This places the Shuttle/SCM USB<->SCSI bridge devices in multi-target * mode */ int usb_stor_euscsi_init(struct us_data *us) { int result; usb_stor_dbg(us, "Attempting to init eUSCSI bridge...\n"); result = usb_stor_control_msg(us, us->send_ctrl_pipe, 0x0C, USB_RECIP_INTERFACE | USB_TYPE_VENDOR, 0x01, 0x0, NULL, 0x0, 5 * HZ); usb_stor_dbg(us, "-- result is %d\n", result); return 0; } /* * This function is required to activate all four slots on the UCR-61S2B * flash reader */ int usb_stor_ucr61s2b_init(struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap*) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap*) us->iobuf; int res; unsigned int partial; static char init_string[] = "\xec\x0a\x06\x00$PCCHIPS"; usb_stor_dbg(us, "Sending UCR-61S2B initialization packet...\n"); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->Tag = 0; bcb->DataTransferLength = cpu_to_le32(0); bcb->Flags = bcb->Lun = 0; bcb->Length = sizeof(init_string) - 1; memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, init_string, sizeof(init_string) - 1); res = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, US_BULK_CB_WRAP_LEN, &partial); if (res) return -EIO; usb_stor_dbg(us, "Getting status packet...\n"); res = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &partial); if (res) return -EIO; return 0; } /* This places the HUAWEI E220 devices in multi-port mode */ int usb_stor_huawei_e220_init(struct us_data *us) { int result; result = usb_stor_control_msg(us, us->send_ctrl_pipe, USB_REQ_SET_FEATURE, USB_TYPE_STANDARD | USB_RECIP_DEVICE, 0x01, 0x0, NULL, 0x0, 1 * HZ); usb_stor_dbg(us, "Huawei mode set result is %d\n", result); return 0; }
5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* * User address space access functions. * * For licencing details see kernel-base/COPYING */ #include <linux/uaccess.h> #include <linux/export.h> #include <linux/instrumented.h> #include <asm/tlbflush.h> /** * copy_from_user_nmi - NMI safe copy from user * @to: Pointer to the destination buffer * @from: Pointer to a user space address of the current task * @n: Number of bytes to copy * * Returns: The number of not copied bytes. 0 is success, i.e. all bytes copied * * Contrary to other copy_from_user() variants this function can be called * from NMI context. Despite the name it is not restricted to be called * from NMI context. It is safe to be called from any other context as * well. It disables pagefaults across the copy which means a fault will * abort the copy. * * For NMI context invocations this relies on the nested NMI work to allow * atomic faults from the NMI path; the nested NMI paths are careful to * preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { unsigned long ret; if (!__access_ok(from, n)) return n; if (!nmi_uaccess_okay()) return n; /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when * called from other contexts. */ pagefault_disable(); instrument_copy_from_user_before(to, from, n); ret = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, ret); pagefault_enable(); return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi);
666 668 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 // SPDX-License-Identifier: GPL-2.0 /* * High memory handling common code and variables. * * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de * * * Redesigned the x86 32-bit VM architecture to deal with * 64-bit physical space. With current x86 CPUs this * means up to 64 Gigabytes physical RAM. * * Rewrote high memory support to move the page cache into * high memory. Implemented permanent (schedulable) kmaps * based on Linus' idea. * * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> */ #include <linux/mm.h> #include <linux/export.h> #include <linux/swap.h> #include <linux/bio.h> #include <linux/pagemap.h> #include <linux/mempool.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/highmem.h> #include <linux/kgdb.h> #include <asm/tlbflush.h> #include <linux/vmalloc.h> #ifdef CONFIG_KMAP_LOCAL static inline int kmap_local_calc_idx(int idx) { return idx + KM_MAX_IDX * smp_processor_id(); } #ifndef arch_kmap_local_map_idx #define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx) #endif #endif /* CONFIG_KMAP_LOCAL */ /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped * since a TLB flush - it is usable. * 1 means that there are no users, but it has been mapped * since the last TLB flush - so we can't use it. * n means that there are (n-1) current users of it. */ #ifdef CONFIG_HIGHMEM /* * Architecture with aliasing data cache may define the following family of * helper functions in its asm/highmem.h to control cache color of virtual * addresses where physical memory pages are mapped by kmap. */ #ifndef get_pkmap_color /* * Determine color of virtual address where the page should be mapped. */ static inline unsigned int get_pkmap_color(struct page *page) { return 0; } #define get_pkmap_color get_pkmap_color /* * Get next index for mapping inside PKMAP region for page with given color. */ static inline unsigned int get_next_pkmap_nr(unsigned int color) { static unsigned int last_pkmap_nr; last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; return last_pkmap_nr; } /* * Determine if page index inside PKMAP region (pkmap_nr) of given color * has wrapped around PKMAP region end. When this happens an attempt to * flush all unused PKMAP slots is made. */ static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color) { return pkmap_nr == 0; } /* * Get the number of PKMAP entries of the given color. If no free slot is * found after checking that many entries, kmap will sleep waiting for * someone to call kunmap and free PKMAP slot. */ static inline int get_pkmap_entries_count(unsigned int color) { return LAST_PKMAP; } /* * Get head of a wait queue for PKMAP entries of the given color. * Wait queues for different mapping colors should be independent to avoid * unnecessary wakeups caused by freeing of slots of other colors. */ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) { static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); return &pkmap_map_wait; } #endif unsigned long __nr_free_highpages(void) { unsigned long pages = 0; struct zone *zone; for_each_populated_zone(zone) { if (is_highmem(zone)) pages += zone_page_state(zone, NR_FREE_PAGES); } return pages; } unsigned long __totalhigh_pages(void) { unsigned long pages = 0; struct zone *zone; for_each_populated_zone(zone) { if (is_highmem(zone)) pages += zone_managed_pages(zone); } return pages; } EXPORT_SYMBOL(__totalhigh_pages); static int pkmap_count[LAST_PKMAP]; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); pte_t *pkmap_page_table; /* * Most architectures have no use for kmap_high_get(), so let's abstract * the disabling of IRQ out of the locking in that case to save on a * potential useless overhead. */ #ifdef ARCH_NEEDS_KMAP_HIGH_GET #define lock_kmap() spin_lock_irq(&kmap_lock) #define unlock_kmap() spin_unlock_irq(&kmap_lock) #define lock_kmap_any(flags) spin_lock_irqsave(&kmap_lock, flags) #define unlock_kmap_any(flags) spin_unlock_irqrestore(&kmap_lock, flags) #else #define lock_kmap() spin_lock(&kmap_lock) #define unlock_kmap() spin_unlock(&kmap_lock) #define lock_kmap_any(flags) \ do { spin_lock(&kmap_lock); (void)(flags); } while (0) #define unlock_kmap_any(flags) \ do { spin_unlock(&kmap_lock); (void)(flags); } while (0) #endif struct page *__kmap_to_page(void *vaddr) { unsigned long base = (unsigned long) vaddr & PAGE_MASK; struct kmap_ctrl *kctrl = &current->kmap_ctrl; unsigned long addr = (unsigned long)vaddr; int i; /* kmap() mappings */ if (WARN_ON_ONCE(addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP))) return pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(addr)])); /* kmap_local_page() mappings */ if (WARN_ON_ONCE(base >= __fix_to_virt(FIX_KMAP_END) && base < __fix_to_virt(FIX_KMAP_BEGIN))) { for (i = 0; i < kctrl->idx; i++) { unsigned long base_addr; int idx; idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); base_addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); if (base_addr == base) return pte_page(kctrl->pteval[i]); } } return virt_to_page(vaddr); } EXPORT_SYMBOL(__kmap_to_page); static void flush_all_zero_pkmaps(void) { int i; int need_flush = 0; flush_cache_kmaps(); for (i = 0; i < LAST_PKMAP; i++) { struct page *page; pte_t ptent; /* * zero means we don't have anything to do, * >1 means that it is still in use. Only * a count of 1 means that it is free but * needs to be unmapped */ if (pkmap_count[i] != 1) continue; pkmap_count[i] = 0; /* sanity check */ ptent = ptep_get(&pkmap_page_table[i]); BUG_ON(pte_none(ptent)); /* * Don't need an atomic fetch-and-clear op here; * no-one has the page mapped, and cannot get at * its virtual address (and hence PTE) without first * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ page = pte_page(ptent); pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]); set_page_address(page, NULL); need_flush = 1; } if (need_flush) flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } void __kmap_flush_unused(void) { lock_kmap(); flush_all_zero_pkmaps(); unlock_kmap(); } static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; int count; unsigned int last_pkmap_nr; unsigned int color = get_pkmap_color(page); start: count = get_pkmap_entries_count(color); /* Find an empty entry */ for (;;) { last_pkmap_nr = get_next_pkmap_nr(color); if (no_more_pkmaps(last_pkmap_nr, color)) { flush_all_zero_pkmaps(); count = get_pkmap_entries_count(color); } if (!pkmap_count[last_pkmap_nr]) break; /* Found a usable entry */ if (--count) continue; /* * Sleep for somebody else to unmap their entries */ { DECLARE_WAITQUEUE(wait, current); wait_queue_head_t *pkmap_map_wait = get_pkmap_wait_queue_head(color); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(pkmap_map_wait, &wait); unlock_kmap(); schedule(); remove_wait_queue(pkmap_map_wait, &wait); lock_kmap(); /* Somebody else might have mapped it while we slept */ if (page_address(page)) return (unsigned long)page_address(page); /* Re-start */ goto start; } } vaddr = PKMAP_ADDR(last_pkmap_nr); set_pte_at(&init_mm, vaddr, &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); pkmap_count[last_pkmap_nr] = 1; set_page_address(page, (void *)vaddr); return vaddr; } /** * kmap_high - map a highmem page into memory * @page: &struct page to map * * Returns the page's virtual memory address. * * We cannot call this from interrupts, as it may block. */ void *kmap_high(struct page *page) { unsigned long vaddr; /* * For highmem pages, we can't trust "virtual" until * after we have the lock. */ lock_kmap(); vaddr = (unsigned long)page_address(page); if (!vaddr) vaddr = map_new_virtual(page); pkmap_count[PKMAP_NR(vaddr)]++; BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); unlock_kmap(); return (void *) vaddr; } EXPORT_SYMBOL(kmap_high); #ifdef ARCH_NEEDS_KMAP_HIGH_GET /** * kmap_high_get - pin a highmem page into memory * @page: &struct page to pin * * Returns the page's current virtual memory address, or NULL if no mapping * exists. If and only if a non null address is returned then a * matching call to kunmap_high() is necessary. * * This can be called from any context. */ void *kmap_high_get(struct page *page) { unsigned long vaddr, flags; lock_kmap_any(flags); vaddr = (unsigned long)page_address(page); if (vaddr) { BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1); pkmap_count[PKMAP_NR(vaddr)]++; } unlock_kmap_any(flags); return (void *) vaddr; } #endif /** * kunmap_high - unmap a highmem page into memory * @page: &struct page to unmap * * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called * only from user context. */ void kunmap_high(struct page *page) { unsigned long vaddr; unsigned long nr; unsigned long flags; int need_wakeup; unsigned int color = get_pkmap_color(page); wait_queue_head_t *pkmap_map_wait; lock_kmap_any(flags); vaddr = (unsigned long)page_address(page); BUG_ON(!vaddr); nr = PKMAP_NR(vaddr); /* * A count must never go down to zero * without a TLB flush! */ need_wakeup = 0; switch (--pkmap_count[nr]) { case 0: BUG(); case 1: /* * Avoid an unnecessary wake_up() function call. * The common case is pkmap_count[] == 1, but * no waiters. * The tasks queued in the wait-queue are guarded * by both the lock in the wait-queue-head and by * the kmap_lock. As the kmap_lock is held here, * no need for the wait-queue-head's lock. Simply * test if the queue is empty. */ pkmap_map_wait = get_pkmap_wait_queue_head(color); need_wakeup = waitqueue_active(pkmap_map_wait); } unlock_kmap_any(flags); /* do wake-up, if needed, race-free outside of the spin lock */ if (need_wakeup) wake_up(pkmap_map_wait); } EXPORT_SYMBOL(kunmap_high); void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (start1 >= end1) start1 = end1 = 0; if (start2 >= end2) start2 = end2 = 0; for (i = 0; i < compound_nr(page); i++) { void *kaddr = NULL; if (start1 >= PAGE_SIZE) { start1 -= PAGE_SIZE; end1 -= PAGE_SIZE; } else { unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); if (end1 > start1) { kaddr = kmap_local_page(page + i); memset(kaddr + start1, 0, this_end - start1); } end1 -= this_end; start1 = 0; } if (start2 >= PAGE_SIZE) { start2 -= PAGE_SIZE; end2 -= PAGE_SIZE; } else { unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); if (end2 > start2) { if (!kaddr) kaddr = kmap_local_page(page + i); memset(kaddr + start2, 0, this_end - start2); } end2 -= this_end; start2 = 0; } if (kaddr) { kunmap_local(kaddr); flush_dcache_page(page + i); } if (!end1 && !end2) break; } BUG_ON((start1 | start2 | end1 | end2) != 0); } EXPORT_SYMBOL(zero_user_segments); #endif /* CONFIG_HIGHMEM */ #ifdef CONFIG_KMAP_LOCAL #include <asm/kmap_size.h> /* * With DEBUG_KMAP_LOCAL the stack depth is doubled and every second * slot is unused which acts as a guard page */ #ifdef CONFIG_DEBUG_KMAP_LOCAL # define KM_INCR 2 #else # define KM_INCR 1 #endif static inline int kmap_local_idx_push(void) { WARN_ON_ONCE(in_hardirq() && !irqs_disabled()); current->kmap_ctrl.idx += KM_INCR; BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX); return current->kmap_ctrl.idx - 1; } static inline int kmap_local_idx(void) { return current->kmap_ctrl.idx - 1; } static inline void kmap_local_idx_pop(void) { current->kmap_ctrl.idx -= KM_INCR; BUG_ON(current->kmap_ctrl.idx < 0); } #ifndef arch_kmap_local_post_map # define arch_kmap_local_post_map(vaddr, pteval) do { } while (0) #endif #ifndef arch_kmap_local_pre_unmap # define arch_kmap_local_pre_unmap(vaddr) do { } while (0) #endif #ifndef arch_kmap_local_post_unmap # define arch_kmap_local_post_unmap(vaddr) do { } while (0) #endif #ifndef arch_kmap_local_unmap_idx #define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx) #endif #ifndef arch_kmap_local_high_get static inline void *arch_kmap_local_high_get(struct page *page) { return NULL; } #endif #ifndef arch_kmap_local_set_pte #define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \ set_pte_at(mm, vaddr, ptep, ptev) #endif /* Unmap a local mapping which was obtained by kmap_high_get() */ static inline bool kmap_high_unmap_local(unsigned long vaddr) { #ifdef ARCH_NEEDS_KMAP_HIGH_GET if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { kunmap_high(pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(vaddr)]))); return true; } #endif return false; } static pte_t *__kmap_pte; static pte_t *kmap_get_pte(unsigned long vaddr, int idx) { if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY)) /* * Set by the arch if __kmap_pte[-idx] does not produce * the correct entry. */ return virt_to_kpte(vaddr); if (!__kmap_pte) __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); return &__kmap_pte[-idx]; } void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) { pte_t pteval, *kmap_pte; unsigned long vaddr; int idx; /* * Disable migration so resulting virtual address is stable * across preemption. */ migrate_disable(); preempt_disable(); idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); kmap_pte = kmap_get_pte(vaddr, idx); BUG_ON(!pte_none(ptep_get(kmap_pte))); pteval = pfn_pte(pfn, prot); arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval); arch_kmap_local_post_map(vaddr, pteval); current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; preempt_enable(); return (void *)vaddr; } EXPORT_SYMBOL_GPL(__kmap_local_pfn_prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot) { void *kmap; /* * To broaden the usage of the actual kmap_local() machinery always map * pages when debugging is enabled and the architecture has no problems * with alias mappings. */ if (!IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) && !PageHighMem(page)) return page_address(page); /* Try kmap_high_get() if architecture has it enabled */ kmap = arch_kmap_local_high_get(page); if (kmap) return kmap; return __kmap_local_pfn_prot(page_to_pfn(page), prot); } EXPORT_SYMBOL(__kmap_local_page_prot); void kunmap_local_indexed(const void *vaddr) { unsigned long addr = (unsigned long) vaddr & PAGE_MASK; pte_t *kmap_pte; int idx; if (addr < __fix_to_virt(FIX_KMAP_END) || addr > __fix_to_virt(FIX_KMAP_BEGIN)) { if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP)) { /* This _should_ never happen! See above. */ WARN_ON_ONCE(1); return; } /* * Handle mappings which were obtained by kmap_high_get() * first as the virtual address of such mappings is below * PAGE_OFFSET. Warn for all other addresses which are in * the user space part of the virtual address space. */ if (!kmap_high_unmap_local(addr)) WARN_ON_ONCE(addr < PAGE_OFFSET); return; } preempt_disable(); idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr); WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); kmap_pte = kmap_get_pte(addr, idx); arch_kmap_local_pre_unmap(addr); pte_clear(&init_mm, addr, kmap_pte); arch_kmap_local_post_unmap(addr); current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0); kmap_local_idx_pop(); preempt_enable(); migrate_enable(); } EXPORT_SYMBOL(kunmap_local_indexed); /* * Invoked before switch_to(). This is safe even when during or after * clearing the maps an interrupt which needs a kmap_local happens because * the task::kmap_ctrl.idx is not modified by the unmapping code so a * nested kmap_local will use the next unused index and restore the index * on unmap. The already cleared kmaps of the outgoing task are irrelevant * because the interrupt context does not know about them. The same applies * when scheduling back in for an interrupt which happens before the * restore is complete. */ void __kmap_local_sched_out(void) { struct task_struct *tsk = current; pte_t *kmap_pte; int i; /* Clear kmaps */ for (i = 0; i < tsk->kmap_ctrl.idx; i++) { pte_t pteval = tsk->kmap_ctrl.pteval[i]; unsigned long addr; int idx; /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) continue; /* * This is a horrible hack for XTENSA to calculate the * coloured PTE index. Uses the PFN encoded into the pteval * and the map index calculation because the actual mapped * virtual address is not stored in task::kmap_ctrl. * For any sane architecture this is optimized out. */ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); kmap_pte = kmap_get_pte(addr, idx); arch_kmap_local_pre_unmap(addr); pte_clear(&init_mm, addr, kmap_pte); arch_kmap_local_post_unmap(addr); } } void __kmap_local_sched_in(void) { struct task_struct *tsk = current; pte_t *kmap_pte; int i; /* Restore kmaps */ for (i = 0; i < tsk->kmap_ctrl.idx; i++) { pte_t pteval = tsk->kmap_ctrl.pteval[i]; unsigned long addr; int idx; /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) continue; /* See comment in __kmap_local_sched_out() */ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); kmap_pte = kmap_get_pte(addr, idx); set_pte_at(&init_mm, addr, kmap_pte, pteval); arch_kmap_local_post_map(addr, pteval); } } void kmap_local_fork(struct task_struct *tsk) { if (WARN_ON_ONCE(tsk->kmap_ctrl.idx)) memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl)); } #endif #if defined(HASHED_PAGE_VIRTUAL) #define PA_HASH_ORDER 7 /* * Describes one page->virtual association */ struct page_address_map { struct page *page; void *virtual; struct list_head list; }; static struct page_address_map page_address_maps[LAST_PKMAP]; /* * Hash table bucket */ static struct page_address_slot { struct list_head lh; /* List of page_address_maps */ spinlock_t lock; /* Protect this bucket's list */ } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; static struct page_address_slot *page_slot(const struct page *page) { return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; } /** * page_address - get the mapped virtual address of a page * @page: &struct page to get the virtual address of * * Returns the page's virtual address. */ void *page_address(const struct page *page) { unsigned long flags; void *ret; struct page_address_slot *pas; if (!PageHighMem(page)) return lowmem_page_address(page); pas = page_slot(page); ret = NULL; spin_lock_irqsave(&pas->lock, flags); if (!list_empty(&pas->lh)) { struct page_address_map *pam; list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { ret = pam->virtual; break; } } } spin_unlock_irqrestore(&pas->lock, flags); return ret; } EXPORT_SYMBOL(page_address); /** * set_page_address - set a page's virtual address * @page: &struct page to set * @virtual: virtual address to use */ void set_page_address(struct page *page, void *virtual) { unsigned long flags; struct page_address_slot *pas; struct page_address_map *pam; BUG_ON(!PageHighMem(page)); pas = page_slot(page); if (virtual) { /* Add */ pam = &page_address_maps[PKMAP_NR((unsigned long)virtual)]; pam->page = page; pam->virtual = virtual; spin_lock_irqsave(&pas->lock, flags); list_add_tail(&pam->list, &pas->lh); spin_unlock_irqrestore(&pas->lock, flags); } else { /* Remove */ spin_lock_irqsave(&pas->lock, flags); list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { list_del(&pam->list); break; } } spin_unlock_irqrestore(&pas->lock, flags); } } void __init page_address_init(void) { int i; for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { INIT_LIST_HEAD(&page_address_htable[i].lh); spin_lock_init(&page_address_htable[i].lock); } } #endif /* defined(HASHED_PAGE_VIRTUAL) */
2 14 83 1412 1413 1411 1411 449 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * vma.h * * Core VMA manipulation API implemented in vma.c. */ #ifndef __MM_VMA_H #define __MM_VMA_H /* * VMA lock generalization */ struct vma_prepare { struct vm_area_struct *vma; struct vm_area_struct *adj_next; struct file *file; struct address_space *mapping; struct anon_vma *anon_vma; struct vm_area_struct *insert; struct vm_area_struct *remove; struct vm_area_struct *remove2; }; struct unlink_vma_file_batch { int count; struct vm_area_struct *vmas[8]; }; /* * vma munmap operation */ struct vma_munmap_struct { struct vma_iterator *vmi; struct vm_area_struct *vma; /* The first vma to munmap */ struct vm_area_struct *prev; /* vma before the munmap area */ struct vm_area_struct *next; /* vma after the munmap area */ struct list_head *uf; /* Userfaultfd list_head */ unsigned long start; /* Aligned start addr (inclusive) */ unsigned long end; /* Aligned end addr (exclusive) */ unsigned long unmap_start; /* Unmap PTE start */ unsigned long unmap_end; /* Unmap PTE end */ int vma_count; /* Number of vmas that will be removed */ bool unlock; /* Unlock after the munmap */ bool clear_ptes; /* If there are outstanding PTE to be cleared */ /* 2 byte hole */ unsigned long nr_pages; /* Number of pages being removed */ unsigned long locked_vm; /* Number of locked pages */ unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */ unsigned long exec_vm; unsigned long stack_vm; unsigned long data_vm; }; enum vma_merge_state { VMA_MERGE_START, VMA_MERGE_ERROR_NOMEM, VMA_MERGE_NOMERGE, VMA_MERGE_SUCCESS, }; enum vma_merge_flags { VMG_FLAG_DEFAULT = 0, /* * If we can expand, simply do so. We know there is nothing to merge to * the right. Does not reset state upon failure to merge. The VMA * iterator is assumed to be positioned at the previous VMA, rather than * at the gap. */ VMG_FLAG_JUST_EXPAND = 1 << 0, }; /* Represents a VMA merge operation. */ struct vma_merge_struct { struct mm_struct *mm; struct vma_iterator *vmi; pgoff_t pgoff; struct vm_area_struct *prev; struct vm_area_struct *next; /* Modified by vma_merge(). */ struct vm_area_struct *vma; /* Either a new VMA or the one being modified. */ unsigned long start; unsigned long end; unsigned long flags; struct file *file; struct anon_vma *anon_vma; struct mempolicy *policy; struct vm_userfaultfd_ctx uffd_ctx; struct anon_vma_name *anon_name; enum vma_merge_flags merge_flags; enum vma_merge_state state; }; static inline bool vmg_nomem(struct vma_merge_struct *vmg) { return vmg->state == VMA_MERGE_ERROR_NOMEM; } /* Assumes addr >= vma->vm_start. */ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma, unsigned long addr) { return vma->vm_pgoff + PHYS_PFN(addr - vma->vm_start); } #define VMG_STATE(name, mm_, vmi_, start_, end_, flags_, pgoff_) \ struct vma_merge_struct name = { \ .mm = mm_, \ .vmi = vmi_, \ .start = start_, \ .end = end_, \ .flags = flags_, \ .pgoff = pgoff_, \ .state = VMA_MERGE_START, \ .merge_flags = VMG_FLAG_DEFAULT, \ } #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \ struct vma_merge_struct name = { \ .mm = vma_->vm_mm, \ .vmi = vmi_, \ .prev = prev_, \ .next = NULL, \ .vma = vma_, \ .start = start_, \ .end = end_, \ .flags = vma_->vm_flags, \ .pgoff = vma_pgoff_offset(vma_, start_), \ .file = vma_->vm_file, \ .anon_vma = vma_->anon_vma, \ .policy = vma_policy(vma_), \ .uffd_ctx = vma_->vm_userfaultfd_ctx, \ .anon_name = anon_vma_name(vma_), \ .state = VMA_MERGE_START, \ .merge_flags = VMG_FLAG_DEFAULT, \ } #ifdef CONFIG_DEBUG_VM_MAPLE_TREE void validate_mm(struct mm_struct *mm); #else #define validate_mm(mm) do { } while (0) #endif /* Required for expand_downwards(). */ void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma); /* Required for expand_downwards(). */ void anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma); int vma_expand(struct vma_merge_struct *vmg); int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff); static inline int vma_iter_store_gfp(struct vma_iterator *vmi, struct vm_area_struct *vma, gfp_t gfp) { if (vmi->mas.status != ma_start && ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start))) vma_iter_invalidate(vmi); __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1); mas_store_gfp(&vmi->mas, vma, gfp); if (unlikely(mas_is_err(&vmi->mas))) return -ENOMEM; return 0; } int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, unsigned long end, struct list_head *uf, bool unlock); int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); void remove_vma(struct vm_area_struct *vma, bool unreachable); void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, struct vm_area_struct *prev, struct vm_area_struct *next); /* We are about to modify the VMA's flags. */ struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi, struct vm_area_struct *prev, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags); /* We are about to modify the VMA's flags and/or anon_name. */ struct vm_area_struct *vma_modify_flags_name(struct vma_iterator *vmi, struct vm_area_struct *prev, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, struct anon_vma_name *new_name); /* We are about to modify the VMA's memory policy. */ struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi, struct vm_area_struct *prev, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct mempolicy *new_pol); /* We are about to modify the VMA's flags and/or uffd context. */ struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi, struct vm_area_struct *prev, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, struct vm_userfaultfd_ctx new_ctx); struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg); struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long delta); void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb); void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb); void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb, struct vm_area_struct *vma); void unlink_file_vma(struct vm_area_struct *vma); void vma_link_file(struct vm_area_struct *vma); int vma_link(struct mm_struct *mm, struct vm_area_struct *vma); struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma); bool vma_needs_dirty_tracking(struct vm_area_struct *vma); bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); int mm_take_all_locks(struct mm_struct *mm); void mm_drop_all_locks(struct mm_struct *mm); unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf); static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) { /* * We want to check manually if we can change individual PTEs writable * if we can't do that automatically for all PTEs in a mapping. For * private mappings, that's always the case when we have write * permissions as we properly have to handle COW. */ if (vma->vm_flags & VM_SHARED) return vma_wants_writenotify(vma, vma->vm_page_prot); return !!(vma->vm_flags & VM_WRITE); } #ifdef CONFIG_MMU static inline pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) { return pgprot_modify(oldprot, vm_get_page_prot(vm_flags)); } #endif static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi, unsigned long min) { return mas_prev(&vmi->mas, min); } /* * These three helpers classifies VMAs for virtual memory accounting. */ /* * Executable code area - executable, not writable, not stack */ static inline bool is_exec_mapping(vm_flags_t flags) { return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC; } /* * Stack area (including shadow stacks) * * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: * do_mmap() forbids all other combinations. */ static inline bool is_stack_mapping(vm_flags_t flags) { return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK); } /* * Data area - private, writable, not stack */ static inline bool is_data_mapping(vm_flags_t flags) { return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; } static inline void vma_iter_config(struct vma_iterator *vmi, unsigned long index, unsigned long last) { __mas_set_range(&vmi->mas, index, last - 1); } static inline void vma_iter_reset(struct vma_iterator *vmi) { mas_reset(&vmi->mas); } static inline struct vm_area_struct *vma_iter_prev_range_limit(struct vma_iterator *vmi, unsigned long min) { return mas_prev_range(&vmi->mas, min); } static inline struct vm_area_struct *vma_iter_next_range_limit(struct vma_iterator *vmi, unsigned long max) { return mas_next_range(&vmi->mas, max); } static inline int vma_iter_area_lowest(struct vma_iterator *vmi, unsigned long min, unsigned long max, unsigned long size) { return mas_empty_area(&vmi->mas, min, max - 1, size); } static inline int vma_iter_area_highest(struct vma_iterator *vmi, unsigned long min, unsigned long max, unsigned long size) { return mas_empty_area_rev(&vmi->mas, min, max - 1, size); } /* * VMA Iterator functions shared between nommu and mmap */ static inline int vma_iter_prealloc(struct vma_iterator *vmi, struct vm_area_struct *vma) { return mas_preallocate(&vmi->mas, vma, GFP_KERNEL); } static inline void vma_iter_clear(struct vma_iterator *vmi) { mas_store_prealloc(&vmi->mas, NULL); } static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi) { return mas_walk(&vmi->mas); } /* Store a VMA with preallocated memory */ static inline void vma_iter_store(struct vma_iterator *vmi, struct vm_area_struct *vma) { #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start && vmi->mas.index > vma->vm_start)) { pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n", vmi->mas.index, vma->vm_start, vma->vm_start, vma->vm_end, vmi->mas.index, vmi->mas.last); } if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start && vmi->mas.last < vma->vm_start)) { pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n", vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end, vmi->mas.index, vmi->mas.last); } #endif if (vmi->mas.status != ma_start && ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start))) vma_iter_invalidate(vmi); __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1); mas_store_prealloc(&vmi->mas, vma); } static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) { return vmi->mas.index; } static inline unsigned long vma_iter_end(struct vma_iterator *vmi) { return vmi->mas.last + 1; } static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, unsigned long count) { return mas_expected_entries(&vmi->mas, count); } static inline struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) { return mas_prev_range(&vmi->mas, 0); } /* * Retrieve the next VMA and rewind the iterator to end of the previous VMA, or * if no previous VMA, to index 0. */ static inline struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi, struct vm_area_struct **pprev) { struct vm_area_struct *next = vma_next(vmi); struct vm_area_struct *prev = vma_prev(vmi); /* * Consider the case where no previous VMA exists. We advance to the * next VMA, skipping any gap, then rewind to the start of the range. * * If we were to unconditionally advance to the next range we'd wind up * at the next VMA again, so we check to ensure there is a previous VMA * to skip over. */ if (prev) vma_iter_next_range(vmi); if (pprev) *pprev = prev; return next; } #ifdef CONFIG_64BIT static inline bool vma_is_sealed(struct vm_area_struct *vma) { return (vma->vm_flags & VM_SEALED); } /* * check if a vma is sealed for modification. * return true, if modification is allowed. */ static inline bool can_modify_vma(struct vm_area_struct *vma) { if (unlikely(vma_is_sealed(vma))) return false; return true; } bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior); #else static inline bool can_modify_vma(struct vm_area_struct *vma) { return true; } static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior) { return true; } #endif #endif /* __MM_VMA_H */
73 102 1 1 1 109 108 1 83 2 80 2 191 193 107 1 11 47 1 26 6 48 201 5 200 201 2 3 188 204 40 166 201 75 36 88 86 2 40 48 2 3 1 4 4 2 2 1 1 1 1 108 3 7 53 49 4 54 37 17 24 4 121 29 29 29 29 24 24 24 24 2 1 1 25 26 51 51 25 2 1 4 22 26 26 42 42 3 15 15 38 23 186 2 184 185 34 147 3 2 4 171 107 106 1 8 98 65 1 61 28 24 39 42 106 92 49 1 79 6 75 56 50 50 28 4 19 17 17 34 27 17 73 72 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/blk-mq.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/fsnotify.h> #include <linux/poll.h> #include <linux/nospec.h> #include <linux/compat.h> #include <linux/io_uring/cmd.h> #include <linux/indirect_call_wrapper.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "opdef.h" #include "kbuf.h" #include "alloc_cache.h" #include "rsrc.h" #include "poll.h" #include "rw.h" struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; u64 addr; u32 len; rwf_t flags; }; static bool io_file_supports_nowait(struct io_kiocb *req, __poll_t mask) { /* If FMODE_NOWAIT is set for a file, we're golden */ if (req->flags & REQ_F_SUPPORT_NOWAIT) return true; /* No FMODE_NOWAIT, if we can poll, check the status */ if (io_file_can_poll(req)) { struct poll_table_struct pt = { ._key = mask }; return vfs_poll(req->file, &pt) & mask; } /* No FMODE_NOWAIT support, and file isn't pollable. Tough luck. */ return false; } #ifdef CONFIG_COMPAT static int io_iov_compat_buffer_select_prep(struct io_rw *rw) { struct compat_iovec __user *uiov; compat_ssize_t clen; uiov = u64_to_user_ptr(rw->addr); if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(clen, &uiov->iov_len)) return -EFAULT; if (clen < 0) return -EINVAL; rw->len = clen; return 0; } #endif static int io_iov_buffer_select_prep(struct io_kiocb *req) { struct iovec __user *uiov; struct iovec iov; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); if (rw->len != 1) return -EINVAL; #ifdef CONFIG_COMPAT if (req->ctx->compat) return io_iov_compat_buffer_select_prep(rw); #endif uiov = u64_to_user_ptr(rw->addr); if (copy_from_user(&iov, uiov, sizeof(*uiov))) return -EFAULT; rw->len = iov.iov_len; return 0; } static int __io_import_iovec(int ddir, struct io_kiocb *req, struct io_async_rw *io, unsigned int issue_flags) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct iovec *iov; void __user *buf; int nr_segs, ret; size_t sqe_len; buf = u64_to_user_ptr(rw->addr); sqe_len = rw->len; if (!def->vectored || req->flags & REQ_F_BUFFER_SELECT) { if (io_do_buffer_select(req)) { buf = io_buffer_select(req, &sqe_len, issue_flags); if (!buf) return -ENOBUFS; rw->addr = (unsigned long) buf; rw->len = sqe_len; } return import_ubuf(ddir, buf, sqe_len, &io->iter); } if (io->free_iovec) { nr_segs = io->free_iov_nr; iov = io->free_iovec; } else { iov = &io->fast_iov; nr_segs = 1; } ret = __import_iovec(ddir, buf, sqe_len, nr_segs, &iov, &io->iter, req->ctx->compat); if (unlikely(ret < 0)) return ret; if (iov) { req->flags |= REQ_F_NEED_CLEANUP; io->free_iov_nr = io->iter.nr_segs; kfree(io->free_iovec); io->free_iovec = iov; } return 0; } static inline int io_import_iovec(int rw, struct io_kiocb *req, struct io_async_rw *io, unsigned int issue_flags) { int ret; ret = __io_import_iovec(rw, req, io, issue_flags); if (unlikely(ret < 0)) return ret; iov_iter_save_state(&io->iter, &io->iter_state); return 0; } static void io_rw_iovec_free(struct io_async_rw *rw) { if (rw->free_iovec) { kfree(rw->free_iovec); rw->free_iov_nr = 0; rw->free_iovec = NULL; } } static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_rw *rw = req->async_data; struct iovec *iov; if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { io_rw_iovec_free(rw); return; } iov = rw->free_iovec; if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { if (iov) kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } } static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) { /* * Disable quick recycling for anything that's gone through io-wq. * In theory, this should be fine to cleanup. However, some read or * write iter handling touches the iovec AFTER having called into the * handler, eg to reexpand or revert. This means we can have: * * task io-wq * issue * punt to io-wq * issue * blkdev_write_iter() * ->ki_complete() * io_complete_rw() * queue tw complete * run tw * req_rw_cleanup * iov_iter_count() <- look at iov_iter again * * which can lead to a UAF. This is only possible for io-wq offload * as the cleanup can run in parallel. As io-wq is not the fast path, * just leave cleanup to the end. * * This is really a bug in the core code that does this, any issue * path should assume that a successful (or -EIOCBQUEUED) return can * mean that the underlying data can be gone at any time. But that * should be fixed seperately, and then this check could be killed. */ if (!(req->flags & REQ_F_REFCOUNT)) { req->flags &= ~REQ_F_NEED_CLEANUP; io_rw_recycle(req, issue_flags); } } static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_rw *rw; rw = io_alloc_cache_get(&ctx->rw_cache); if (rw) { if (rw->free_iovec) { kasan_mempool_unpoison_object(rw->free_iovec, rw->free_iov_nr * sizeof(struct iovec)); req->flags |= REQ_F_NEED_CLEANUP; } req->flags |= REQ_F_ASYNC_DATA; req->async_data = rw; goto done; } if (!io_alloc_async_data(req)) { rw = req->async_data; rw->free_iovec = NULL; rw->free_iov_nr = 0; done: rw->bytes_done = 0; return 0; } return -ENOMEM; } static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import) { struct io_async_rw *rw; int ret; if (io_rw_alloc_async(req)) return -ENOMEM; if (!do_import || io_do_buffer_select(req)) return 0; rw = req->async_data; ret = io_import_iovec(ddir, req, rw, 0); if (unlikely(ret < 0)) return ret; iov_iter_save_state(&rw->iter, &rw->iter_state); return 0; } static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir, bool do_import) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned ioprio; int ret; rw->kiocb.ki_pos = READ_ONCE(sqe->off); /* used for fixed read/write too - just read unconditionally */ req->buf_index = READ_ONCE(sqe->buf_index); ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { ret = ioprio_check_cap(ioprio); if (ret) return ret; rw->kiocb.ki_ioprio = ioprio; } else { rw->kiocb.ki_ioprio = get_current_ioprio(); } rw->kiocb.dio_complete = NULL; rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); return io_prep_rw_setup(req, ddir, do_import); } int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return io_prep_rw(req, sqe, ITER_DEST, true); } int io_prep_write(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return io_prep_rw(req, sqe, ITER_SOURCE, true); } static int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir) { const bool do_import = !(req->flags & REQ_F_BUFFER_SELECT); int ret; ret = io_prep_rw(req, sqe, ddir, do_import); if (unlikely(ret)) return ret; if (do_import) return 0; /* * Have to do this validation here, as this is in io_read() rw->len * might have chanaged due to buffer selection */ return io_iov_buffer_select_prep(req); } int io_prep_readv(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return io_prep_rwv(req, sqe, ITER_DEST); } int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return io_prep_rwv(req, sqe, ITER_SOURCE); } static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct io_ring_ctx *ctx = req->ctx; struct io_rsrc_node *node; struct io_async_rw *io; int ret; ret = io_prep_rw(req, sqe, ddir, false); if (unlikely(ret)) return ret; node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index); if (!node) return -EFAULT; io_req_assign_buf_node(req, node); io = req->async_data; ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len); iov_iter_save_state(&io->iter, &io->iter_state); return ret; } int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return io_prep_rw_fixed(req, sqe, ITER_DEST); } int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe) { return io_prep_rw_fixed(req, sqe, ITER_SOURCE); } /* * Multishot read is prepared just like a normal read/write request, only * difference is that we set the MULTISHOT flag. */ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); int ret; /* must be used with provided buffers */ if (!(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; ret = io_prep_rw(req, sqe, ITER_DEST, false); if (unlikely(ret)) return ret; if (rw->addr || rw->len) return -EINVAL; req->flags |= REQ_F_APOLL_MULTISHOT; return 0; } void io_readv_writev_cleanup(struct io_kiocb *req) { io_rw_iovec_free(req->async_data); } static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); if (rw->kiocb.ki_pos != -1) return &rw->kiocb.ki_pos; if (!(req->file->f_mode & FMODE_STREAM)) { req->flags |= REQ_F_CUR_POS; rw->kiocb.ki_pos = req->file->f_pos; return &rw->kiocb.ki_pos; } rw->kiocb.ki_pos = 0; return NULL; } #ifdef CONFIG_BLOCK static void io_resubmit_prep(struct io_kiocb *req) { struct io_async_rw *io = req->async_data; iov_iter_restore(&io->iter, &io->iter_state); } static bool io_rw_should_reissue(struct io_kiocb *req) { umode_t mode = file_inode(req->file)->i_mode; struct io_ring_ctx *ctx = req->ctx; if (!S_ISBLK(mode) && !S_ISREG(mode)) return false; if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && !(ctx->flags & IORING_SETUP_IOPOLL))) return false; /* * If ref is dying, we might be running poll reap from the exit work. * Don't attempt to reissue from that path, just let it fail with * -EAGAIN. */ if (percpu_ref_is_dying(&ctx->refs)) return false; /* * Play it safe and assume not safe to re-import and reissue if we're * not in the original thread group (or in task context). */ if (!same_thread_group(req->tctx->task, current) || !in_task()) return false; return true; } #else static void io_resubmit_prep(struct io_kiocb *req) { } static bool io_rw_should_reissue(struct io_kiocb *req) { return false; } #endif static void io_req_end_write(struct io_kiocb *req) { if (req->flags & REQ_F_ISREG) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); kiocb_end_write(&rw->kiocb); } } /* * Trigger the notifications after having done some IO, and finish the write * accounting, if any. */ static void io_req_io_end(struct io_kiocb *req) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); if (rw->kiocb.ki_flags & IOCB_WRITE) { io_req_end_write(req); fsnotify_modify(req->file); } else { fsnotify_access(req->file); } } static bool __io_complete_rw_common(struct io_kiocb *req, long res) { if (unlikely(res != req->cqe.res)) { if (res == -EAGAIN && io_rw_should_reissue(req)) { /* * Reissue will start accounting again, finish the * current cycle. */ io_req_io_end(req); req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; return true; } req_set_fail(req); req->cqe.res = res; } return false; } static inline int io_fixup_rw_res(struct io_kiocb *req, long res) { struct io_async_rw *io = req->async_data; /* add previously done IO, if any */ if (req_has_async_data(req) && io->bytes_done > 0) { if (res < 0) res = io->bytes_done; else res += io->bytes_done; } return res; } void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct kiocb *kiocb = &rw->kiocb; if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) { long res = kiocb->dio_complete(rw->kiocb.private); io_req_set_res(req, io_fixup_rw_res(req, res), 0); } io_req_io_end(req); if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) req->cqe.flags |= io_put_kbuf(req, req->cqe.res, 0); io_req_rw_cleanup(req, 0); io_req_task_complete(req, ts); } static void io_complete_rw(struct kiocb *kiocb, long res) { struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb); struct io_kiocb *req = cmd_to_io_kiocb(rw); if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) { if (__io_complete_rw_common(req, res)) return; io_req_set_res(req, io_fixup_rw_res(req, res), 0); } req->io_task_work.func = io_req_rw_complete; __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE); } static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) { struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb); struct io_kiocb *req = cmd_to_io_kiocb(rw); if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); if (unlikely(res != req->cqe.res)) { if (res == -EAGAIN && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; return; } req->cqe.res = res; } /* order with io_iopoll_complete() checking ->iopoll_completed */ smp_store_release(&req->iopoll_completed, 1); } static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) { /* IO was queued async, completion will happen later */ if (ret == -EIOCBQUEUED) return; /* transform internal restart error codes */ if (unlikely(ret < 0)) { switch (ret) { case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: case -ERESTART_RESTARTBLOCK: /* * We can't just restart the syscall, since previously * submitted sqes may already be in progress. Just fail * this IO with EINTR. */ ret = -EINTR; break; } } INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll, io_complete_rw, kiocb, ret); } static int kiocb_done(struct io_kiocb *req, ssize_t ret, unsigned int issue_flags) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned final_ret = io_fixup_rw_res(req, ret); if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { if (!__io_complete_rw_common(req, ret)) { /* * Safe to call io_end from here as we're inline * from the submission path. */ io_req_io_end(req); io_req_set_res(req, final_ret, io_put_kbuf(req, ret, issue_flags)); io_req_rw_cleanup(req, issue_flags); return IOU_OK; } } else { io_rw_done(&rw->kiocb, ret); } if (req->flags & REQ_F_REISSUE) { req->flags &= ~REQ_F_REISSUE; io_resubmit_prep(req); return -EAGAIN; } return IOU_ISSUE_SKIP_COMPLETE; } static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) { return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; } /* * For files that don't have ->read_iter() and ->write_iter(), handle them * by looping over ->read() or ->write() manually. */ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter) { struct kiocb *kiocb = &rw->kiocb; struct file *file = kiocb->ki_filp; ssize_t ret = 0; loff_t *ppos; /* * Don't support polled IO through this interface, and we can't * support non-blocking either. For the latter, this just causes * the kiocb to be handled from an async context. */ if (kiocb->ki_flags & IOCB_HIPRI) return -EOPNOTSUPP; if ((kiocb->ki_flags & IOCB_NOWAIT) && !(kiocb->ki_filp->f_flags & O_NONBLOCK)) return -EAGAIN; ppos = io_kiocb_ppos(kiocb); while (iov_iter_count(iter)) { void __user *addr; size_t len; ssize_t nr; if (iter_is_ubuf(iter)) { addr = iter->ubuf + iter->iov_offset; len = iov_iter_count(iter); } else if (!iov_iter_is_bvec(iter)) { addr = iter_iov_addr(iter); len = iter_iov_len(iter); } else { addr = u64_to_user_ptr(rw->addr); len = rw->len; } if (ddir == READ) nr = file->f_op->read(file, addr, len, ppos); else nr = file->f_op->write(file, addr, len, ppos); if (nr < 0) { if (!ret) ret = nr; break; } ret += nr; if (!iov_iter_is_bvec(iter)) { iov_iter_advance(iter, nr); } else { rw->addr += nr; rw->len -= nr; if (!rw->len) break; } if (nr != len) break; } return ret; } /* * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. * This gets called when the page is unlocked, and we generally expect that to * happen when the page IO is completed and the page is now uptodate. This will * queue a task_work based retry of the operation, attempting to copy the data * again. If the latter fails because the page was NOT uptodate, then we will * do a thread based blocking retry of the operation. That's the unexpected * slow path. */ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, int sync, void *arg) { struct wait_page_queue *wpq; struct io_kiocb *req = wait->private; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct wait_page_key *key = arg; wpq = container_of(wait, struct wait_page_queue, wait); if (!wake_page_match(wpq, key)) return 0; rw->kiocb.ki_flags &= ~IOCB_WAITQ; list_del_init(&wait->entry); io_req_task_queue(req); return 1; } /* * This controls whether a given IO request should be armed for async page * based retry. If we return false here, the request is handed to the async * worker threads for retry. If we're doing buffered reads on a regular file, * we prepare a private wait_page_queue entry and retry the operation. This * will either succeed because the page is now uptodate and unlocked, or it * will register a callback when the page is unlocked at IO completion. Through * that callback, io_uring uses task_work to setup a retry of the operation. * That retry will attempt the buffered read again. The retry will generally * succeed, or in rare cases where it fails, we then fall back to using the * async worker threads for a blocking retry. */ static bool io_rw_should_retry(struct io_kiocb *req) { struct io_async_rw *io = req->async_data; struct wait_page_queue *wait = &io->wpq; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct kiocb *kiocb = &rw->kiocb; /* never retry for NOWAIT, we just complete with -EAGAIN */ if (req->flags & REQ_F_NOWAIT) return false; /* Only for buffered IO */ if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) return false; /* * just use poll if we can, and don't attempt if the fs doesn't * support callback based unlocks */ if (io_file_can_poll(req) || !(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC)) return false; wait->wait.func = io_async_buf_func; wait->wait.private = req; wait->wait.flags = 0; INIT_LIST_HEAD(&wait->wait.entry); kiocb->ki_flags |= IOCB_WAITQ; kiocb->ki_flags &= ~IOCB_NOWAIT; kiocb->ki_waitq = wait; return true; } static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter) { struct file *file = rw->kiocb.ki_filp; if (likely(file->f_op->read_iter)) return file->f_op->read_iter(&rw->kiocb, iter); else if (file->f_op->read) return loop_rw_iter(READ, rw, iter); else return -EINVAL; } static bool need_complete_io(struct io_kiocb *req) { return req->flags & REQ_F_ISREG || S_ISBLK(file_inode(req->file)->i_mode); } static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct kiocb *kiocb = &rw->kiocb; struct io_ring_ctx *ctx = req->ctx; struct file *file = req->file; int ret; if (unlikely(!(file->f_mode & mode))) return -EBADF; if (!(req->flags & REQ_F_FIXED_FILE)) req->flags |= io_file_get_flags(file); kiocb->ki_flags = file->f_iocb_flags; ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); if (unlikely(ret)) return ret; kiocb->ki_flags |= IOCB_ALLOC_CACHE; /* * If the file is marked O_NONBLOCK, still allow retry for it if it * supports async. Otherwise it's impossible to use O_NONBLOCK files * reliably. If not, or it IOCB_NOWAIT is set, don't retry. */ if (kiocb->ki_flags & IOCB_NOWAIT || ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT)))) req->flags |= REQ_F_NOWAIT; if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) return -EOPNOTSUPP; kiocb->private = NULL; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { /* make sure every req only blocks once*/ req->flags &= ~REQ_F_IOPOLL_STATE; req->iopoll_start = ktime_get_ns(); } } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; kiocb->ki_complete = io_complete_rw; } return 0; } static int __io_read(struct io_kiocb *req, unsigned int issue_flags) { bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct io_async_rw *io = req->async_data; struct kiocb *kiocb = &rw->kiocb; ssize_t ret; loff_t *ppos; if (io_do_buffer_select(req)) { ret = io_import_iovec(ITER_DEST, req, io, issue_flags); if (unlikely(ret < 0)) return ret; } ret = io_rw_init_file(req, FMODE_READ, READ); if (unlikely(ret)) return ret; req->cqe.res = iov_iter_count(&io->iter); if (force_nonblock) { /* If the file doesn't support async, just async punt */ if (unlikely(!io_file_supports_nowait(req, EPOLLIN))) return -EAGAIN; kiocb->ki_flags |= IOCB_NOWAIT; } else { /* Ensure we clear previously set non-block flag */ kiocb->ki_flags &= ~IOCB_NOWAIT; } ppos = io_kiocb_update_pos(req); ret = rw_verify_area(READ, req->file, ppos, req->cqe.res); if (unlikely(ret)) return ret; ret = io_iter_do_read(rw, &io->iter); /* * Some file systems like to return -EOPNOTSUPP for an IOCB_NOWAIT * issue, even though they should be returning -EAGAIN. To be safe, * retry from blocking context for either. */ if (ret == -EOPNOTSUPP && force_nonblock) ret = -EAGAIN; if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { req->flags &= ~REQ_F_REISSUE; /* If we can poll, just do that. */ if (io_file_can_poll(req)) return -EAGAIN; /* IOPOLL retry should happen for io-wq threads */ if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) goto done; /* no retry on NONBLOCK nor RWF_NOWAIT */ if (req->flags & REQ_F_NOWAIT) goto done; ret = 0; } else if (ret == -EIOCBQUEUED) { return IOU_ISSUE_SKIP_COMPLETE; } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock || (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) { /* read all, failed, already did sync or don't want to retry */ goto done; } /* * Don't depend on the iter state matching what was consumed, or being * untouched in case of error. Restore it and we'll advance it * manually if we need to. */ iov_iter_restore(&io->iter, &io->iter_state); do { /* * We end up here because of a partial read, either from * above or inside this loop. Advance the iter by the bytes * that were consumed. */ iov_iter_advance(&io->iter, ret); if (!iov_iter_count(&io->iter)) break; io->bytes_done += ret; iov_iter_save_state(&io->iter, &io->iter_state); /* if we can retry, do so with the callbacks armed */ if (!io_rw_should_retry(req)) { kiocb->ki_flags &= ~IOCB_WAITQ; return -EAGAIN; } req->cqe.res = iov_iter_count(&io->iter); /* * Now retry read with the IOCB_WAITQ parts set in the iocb. If * we get -EIOCBQUEUED, then we'll get a notification when the * desired page gets unlocked. We can also get a partial read * here, and if we do, then just retry at the new offset. */ ret = io_iter_do_read(rw, &io->iter); if (ret == -EIOCBQUEUED) return IOU_ISSUE_SKIP_COMPLETE; /* we got some bytes, but not all. retry. */ kiocb->ki_flags &= ~IOCB_WAITQ; iov_iter_restore(&io->iter, &io->iter_state); } while (ret > 0); done: /* it's faster to check here then delegate to kfree */ return ret; } int io_read(struct io_kiocb *req, unsigned int issue_flags) { int ret; ret = __io_read(req, issue_flags); if (ret >= 0) return kiocb_done(req, ret, issue_flags); return ret; } int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned int cflags = 0; int ret; /* * Multishot MUST be used on a pollable file */ if (!io_file_can_poll(req)) return -EBADFD; ret = __io_read(req, issue_flags); /* * If we get -EAGAIN, recycle our buffer and just let normal poll * handling arm it. */ if (ret == -EAGAIN) { /* * Reset rw->len to 0 again to avoid clamping future mshot * reads, in case the buffer size varies. */ if (io_kbuf_recycle(req, issue_flags)) rw->len = 0; if (issue_flags & IO_URING_F_MULTISHOT) return IOU_ISSUE_SKIP_COMPLETE; return -EAGAIN; } else if (ret <= 0) { io_kbuf_recycle(req, issue_flags); if (ret < 0) req_set_fail(req); } else { /* * Any successful return value will keep the multishot read * armed, if it's still set. Put our buffer and post a CQE. If * we fail to post a CQE, or multishot is no longer set, then * jump to the termination path. This request is then done. */ cflags = io_put_kbuf(req, ret, issue_flags); rw->len = 0; /* similarly to above, reset len to 0 */ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { if (issue_flags & IO_URING_F_MULTISHOT) { /* * Force retry, as we might have more data to * be read and otherwise it won't get retried * until (if ever) another poll is triggered. */ io_poll_multishot_retry(req); return IOU_ISSUE_SKIP_COMPLETE; } return -EAGAIN; } } /* * Either an error, or we've hit overflow posting the CQE. For any * multishot request, hitting overflow will terminate it. */ io_req_set_res(req, ret, cflags); io_req_rw_cleanup(req, issue_flags); if (issue_flags & IO_URING_F_MULTISHOT) return IOU_STOP_MULTISHOT; return IOU_OK; } static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb) { struct inode *inode; bool ret; if (!(req->flags & REQ_F_ISREG)) return true; if (!(kiocb->ki_flags & IOCB_NOWAIT)) { kiocb_start_write(kiocb); return true; } inode = file_inode(kiocb->ki_filp); ret = sb_start_write_trylock(inode->i_sb); if (ret) __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); return ret; } int io_write(struct io_kiocb *req, unsigned int issue_flags) { bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct io_async_rw *io = req->async_data; struct kiocb *kiocb = &rw->kiocb; ssize_t ret, ret2; loff_t *ppos; ret = io_rw_init_file(req, FMODE_WRITE, WRITE); if (unlikely(ret)) return ret; req->cqe.res = iov_iter_count(&io->iter); if (force_nonblock) { /* If the file doesn't support async, just async punt */ if (unlikely(!io_file_supports_nowait(req, EPOLLOUT))) goto ret_eagain; /* Check if we can support NOWAIT. */ if (!(kiocb->ki_flags & IOCB_DIRECT) && !(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) && (req->flags & REQ_F_ISREG)) goto ret_eagain; kiocb->ki_flags |= IOCB_NOWAIT; } else { /* Ensure we clear previously set non-block flag */ kiocb->ki_flags &= ~IOCB_NOWAIT; } ppos = io_kiocb_update_pos(req); ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res); if (unlikely(ret)) return ret; if (unlikely(!io_kiocb_start_write(req, kiocb))) return -EAGAIN; kiocb->ki_flags |= IOCB_WRITE; if (likely(req->file->f_op->write_iter)) ret2 = req->file->f_op->write_iter(kiocb, &io->iter); else if (req->file->f_op->write) ret2 = loop_rw_iter(WRITE, rw, &io->iter); else ret2 = -EINVAL; if (req->flags & REQ_F_REISSUE) { req->flags &= ~REQ_F_REISSUE; ret2 = -EAGAIN; } /* * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just * retry them without IOCB_NOWAIT. */ if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) ret2 = -EAGAIN; /* no retry on NONBLOCK nor RWF_NOWAIT */ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) goto done; if (!force_nonblock || ret2 != -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL)) goto ret_eagain; if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) { trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2, req->cqe.res, ret2); /* This is a partial write. The file pos has already been * updated, setup the async struct to complete the request * in the worker. Also update bytes_done to account for * the bytes already written. */ iov_iter_save_state(&io->iter, &io->iter_state); io->bytes_done += ret2; if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); return -EAGAIN; } done: return kiocb_done(req, ret2, issue_flags); } else { ret_eagain: iov_iter_restore(&io->iter, &io->iter_state); if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); return -EAGAIN; } } void io_rw_fail(struct io_kiocb *req) { int res; res = io_fixup_rw_res(req, req->cqe.res); io_req_set_res(req, res, req->cqe.flags); } static int io_uring_classic_poll(struct io_kiocb *req, struct io_comp_batch *iob, unsigned int poll_flags) { struct file *file = req->file; if (req->opcode == IORING_OP_URING_CMD) { struct io_uring_cmd *ioucmd; ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); return file->f_op->uring_cmd_iopoll(ioucmd, iob, poll_flags); } else { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); return file->f_op->iopoll(&rw->kiocb, iob, poll_flags); } } static u64 io_hybrid_iopoll_delay(struct io_ring_ctx *ctx, struct io_kiocb *req) { struct hrtimer_sleeper timer; enum hrtimer_mode mode; ktime_t kt; u64 sleep_time; if (req->flags & REQ_F_IOPOLL_STATE) return 0; if (ctx->hybrid_poll_time == LLONG_MAX) return 0; /* Using half the running time to do schedule */ sleep_time = ctx->hybrid_poll_time / 2; kt = ktime_set(0, sleep_time); req->flags |= REQ_F_IOPOLL_STATE; mode = HRTIMER_MODE_REL; hrtimer_setup_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode); hrtimer_set_expires(&timer.timer, kt); set_current_state(TASK_INTERRUPTIBLE); hrtimer_sleeper_start_expires(&timer, mode); if (timer.task) io_schedule(); hrtimer_cancel(&timer.timer); __set_current_state(TASK_RUNNING); destroy_hrtimer_on_stack(&timer.timer); return sleep_time; } static int io_uring_hybrid_poll(struct io_kiocb *req, struct io_comp_batch *iob, unsigned int poll_flags) { struct io_ring_ctx *ctx = req->ctx; u64 runtime, sleep_time; int ret; sleep_time = io_hybrid_iopoll_delay(ctx, req); ret = io_uring_classic_poll(req, iob, poll_flags); runtime = ktime_get_ns() - req->iopoll_start - sleep_time; /* * Use minimum sleep time if we're polling devices with different * latencies. We could get more completions from the faster ones. */ if (ctx->hybrid_poll_time > runtime) ctx->hybrid_poll_time = runtime; return ret; } int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) { struct io_wq_work_node *pos, *start, *prev; unsigned int poll_flags = 0; DEFINE_IO_COMP_BATCH(iob); int nr_events = 0; /* * Only spin for completions if we don't have multiple devices hanging * off our complete list. */ if (ctx->poll_multi_queue || force_nonspin) poll_flags |= BLK_POLL_ONESHOT; wq_list_for_each(pos, start, &ctx->iopoll_list) { struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); int ret; /* * Move completed and retryable entries to our local lists. * If we find a request that requires polling, break out * and complete those lists first, if we have entries there. */ if (READ_ONCE(req->iopoll_completed)) break; if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) ret = io_uring_hybrid_poll(req, &iob, poll_flags); else ret = io_uring_classic_poll(req, &iob, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ if (!rq_list_empty(&iob.req_list) || READ_ONCE(req->iopoll_completed)) break; } if (!rq_list_empty(&iob.req_list)) iob.complete(&iob); else if (!pos) return 0; prev = start; wq_list_for_each_resume(pos, prev) { struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); /* order with io_complete_rw_iopoll(), e.g. ->result updates */ if (!smp_load_acquire(&req->iopoll_completed)) break; nr_events++; req->cqe.flags = io_put_kbuf(req, req->cqe.res, 0); if (req->opcode != IORING_OP_URING_CMD) io_req_rw_cleanup(req, 0); } if (unlikely(!nr_events)) return 0; pos = start ? start->next : ctx->iopoll_list.first; wq_list_cut(&ctx->iopoll_list, prev, start); if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs))) return 0; ctx->submit_state.compl_reqs.first = pos; __io_submit_flush_completions(ctx); return nr_events; } void io_rw_cache_free(const void *entry) { struct io_async_rw *rw = (struct io_async_rw *) entry; if (rw->free_iovec) { kasan_mempool_unpoison_object(rw->free_iovec, rw->free_iov_nr * sizeof(struct iovec)); io_rw_iovec_free(rw); } kfree(rw); }
98 86 14 112 102 8 6 8 8 6 111 98 101 113 89 85 89 85 86 89 2 1 1 1 5 4 2 2 1 2 2 1 1 2 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 7 1 7 1 14 7 5 2 6 3 1 3 3 18 3 1 5 2 7 9 14 2 12 3 6 4 5 8 12 11 1 5 9 18 9 17 31 1 1 1 1 26 1 1 15 1 2 1 1 13 1 1 6 1 13 2 2 9 26 16 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 /* * Copyright (C) 2014 Red Hat * Copyright (C) 2014 Intel Corp. * Copyright (C) 2018 Intel Corp. * Copyright (c) 2020, The Linux Foundation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Rob Clark <robdclark@gmail.com> * Daniel Vetter <daniel.vetter@ffwll.ch> */ #include <drm/drm_atomic_uapi.h> #include <drm/drm_atomic.h> #include <drm/drm_framebuffer.h> #include <drm/drm_print.h> #include <drm/drm_drv.h> #include <drm/drm_writeback.h> #include <drm/drm_vblank.h> #include <linux/dma-fence.h> #include <linux/uaccess.h> #include <linux/sync_file.h> #include <linux/file.h> #include "drm_crtc_internal.h" /** * DOC: overview * * This file contains the marshalling and demarshalling glue for the atomic UAPI * in all its forms: The monster ATOMIC IOCTL itself, code for GET_PROPERTY and * SET_PROPERTY IOCTLs. Plus interface functions for compatibility helpers and * drivers which have special needs to construct their own atomic updates, e.g. * for load detect or similar. */ /** * drm_atomic_set_mode_for_crtc - set mode for CRTC * @state: the CRTC whose incoming state to update * @mode: kernel-internal mode to use for the CRTC, or NULL to disable * * Set a mode (originating from the kernel) on the desired CRTC state and update * the enable property. * * RETURNS: * Zero on success, error code on failure. Cannot return -EDEADLK. */ int drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state, const struct drm_display_mode *mode) { struct drm_crtc *crtc = state->crtc; struct drm_mode_modeinfo umode; /* Early return for no change. */ if (mode && memcmp(&state->mode, mode, sizeof(*mode)) == 0) return 0; drm_property_blob_put(state->mode_blob); state->mode_blob = NULL; if (mode) { struct drm_property_blob *blob; drm_mode_convert_to_umode(&umode, mode); blob = drm_property_create_blob(crtc->dev, sizeof(umode), &umode); if (IS_ERR(blob)) return PTR_ERR(blob); drm_mode_copy(&state->mode, mode); state->mode_blob = blob; state->enable = true; drm_dbg_atomic(crtc->dev, "Set [MODE:%s] for [CRTC:%d:%s] state %p\n", mode->name, crtc->base.id, crtc->name, state); } else { memset(&state->mode, 0, sizeof(state->mode)); state->enable = false; drm_dbg_atomic(crtc->dev, "Set [NOMODE] for [CRTC:%d:%s] state %p\n", crtc->base.id, crtc->name, state); } return 0; } EXPORT_SYMBOL(drm_atomic_set_mode_for_crtc); /** * drm_atomic_set_mode_prop_for_crtc - set mode for CRTC * @state: the CRTC whose incoming state to update * @blob: pointer to blob property to use for mode * * Set a mode (originating from a blob property) on the desired CRTC state. * This function will take a reference on the blob property for the CRTC state, * and release the reference held on the state's existing mode property, if any * was set. * * RETURNS: * Zero on success, error code on failure. Cannot return -EDEADLK. */ int drm_atomic_set_mode_prop_for_crtc(struct drm_crtc_state *state, struct drm_property_blob *blob) { struct drm_crtc *crtc = state->crtc; if (blob == state->mode_blob) return 0; drm_property_blob_put(state->mode_blob); state->mode_blob = NULL; memset(&state->mode, 0, sizeof(state->mode)); if (blob) { int ret; if (blob->length != sizeof(struct drm_mode_modeinfo)) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] bad mode blob length: %zu\n", crtc->base.id, crtc->name, blob->length); return -EINVAL; } ret = drm_mode_convert_umode(crtc->dev, &state->mode, blob->data); if (ret) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] invalid mode (%s, %pe): " DRM_MODE_FMT "\n", crtc->base.id, crtc->name, drm_get_mode_status_name(state->mode.status), ERR_PTR(ret), DRM_MODE_ARG(&state->mode)); return -EINVAL; } state->mode_blob = drm_property_blob_get(blob); state->enable = true; drm_dbg_atomic(crtc->dev, "Set [MODE:%s] for [CRTC:%d:%s] state %p\n", state->mode.name, crtc->base.id, crtc->name, state); } else { state->enable = false; drm_dbg_atomic(crtc->dev, "Set [NOMODE] for [CRTC:%d:%s] state %p\n", crtc->base.id, crtc->name, state); } return 0; } EXPORT_SYMBOL(drm_atomic_set_mode_prop_for_crtc); /** * drm_atomic_set_crtc_for_plane - set CRTC for plane * @plane_state: the plane whose incoming state to update * @crtc: CRTC to use for the plane * * Changing the assigned CRTC for a plane requires us to grab the lock and state * for the new CRTC, as needed. This function takes care of all these details * besides updating the pointer in the state object itself. * * Returns: * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK * then the w/w mutex code has detected a deadlock and the entire atomic * sequence must be restarted. All other errors are fatal. */ int drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, struct drm_crtc *crtc) { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; /* Nothing to do for same crtc*/ if (plane_state->crtc == crtc) return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); if (WARN_ON(IS_ERR(crtc_state))) return PTR_ERR(crtc_state); crtc_state->plane_mask &= ~drm_plane_mask(plane); } plane_state->crtc = crtc; if (crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); crtc_state->plane_mask |= drm_plane_mask(plane); } if (crtc) drm_dbg_atomic(plane->dev, "Link [PLANE:%d:%s] state %p to [CRTC:%d:%s]\n", plane->base.id, plane->name, plane_state, crtc->base.id, crtc->name); else drm_dbg_atomic(plane->dev, "Link [PLANE:%d:%s] state %p to [NOCRTC]\n", plane->base.id, plane->name, plane_state); return 0; } EXPORT_SYMBOL(drm_atomic_set_crtc_for_plane); /** * drm_atomic_set_fb_for_plane - set framebuffer for plane * @plane_state: atomic state object for the plane * @fb: fb to use for the plane * * Changing the assigned framebuffer for a plane requires us to grab a reference * to the new fb and drop the reference to the old fb, if there is one. This * function takes care of all these details besides updating the pointer in the * state object itself. */ void drm_atomic_set_fb_for_plane(struct drm_plane_state *plane_state, struct drm_framebuffer *fb) { struct drm_plane *plane = plane_state->plane; if (fb) drm_dbg_atomic(plane->dev, "Set [FB:%d] for [PLANE:%d:%s] state %p\n", fb->base.id, plane->base.id, plane->name, plane_state); else drm_dbg_atomic(plane->dev, "Set [NOFB] for [PLANE:%d:%s] state %p\n", plane->base.id, plane->name, plane_state); drm_framebuffer_assign(&plane_state->fb, fb); } EXPORT_SYMBOL(drm_atomic_set_fb_for_plane); /** * drm_atomic_set_crtc_for_connector - set CRTC for connector * @conn_state: atomic state object for the connector * @crtc: CRTC to use for the connector * * Changing the assigned CRTC for a connector requires us to grab the lock and * state for the new CRTC, as needed. This function takes care of all these * details besides updating the pointer in the state object itself. * * Returns: * 0 on success or can fail with -EDEADLK or -ENOMEM. When the error is EDEADLK * then the w/w mutex code has detected a deadlock and the entire atomic * sequence must be restarted. All other errors are fatal. */ int drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state, struct drm_crtc *crtc) { struct drm_connector *connector = conn_state->connector; struct drm_crtc_state *crtc_state; if (conn_state->crtc == crtc) return 0; if (conn_state->crtc) { crtc_state = drm_atomic_get_new_crtc_state(conn_state->state, conn_state->crtc); crtc_state->connector_mask &= ~drm_connector_mask(conn_state->connector); drm_connector_put(conn_state->connector); conn_state->crtc = NULL; } if (crtc) { crtc_state = drm_atomic_get_crtc_state(conn_state->state, crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); crtc_state->connector_mask |= drm_connector_mask(conn_state->connector); drm_connector_get(conn_state->connector); conn_state->crtc = crtc; drm_dbg_atomic(connector->dev, "Link [CONNECTOR:%d:%s] state %p to [CRTC:%d:%s]\n", connector->base.id, connector->name, conn_state, crtc->base.id, crtc->name); } else { drm_dbg_atomic(connector->dev, "Link [CONNECTOR:%d:%s] state %p to [NOCRTC]\n", connector->base.id, connector->name, conn_state); } return 0; } EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector); static void set_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc, s32 __user *fence_ptr) { state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = fence_ptr; } static s32 __user *get_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc) { s32 __user *fence_ptr; fence_ptr = state->crtcs[drm_crtc_index(crtc)].out_fence_ptr; state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = NULL; return fence_ptr; } static int set_out_fence_for_connector(struct drm_atomic_state *state, struct drm_connector *connector, s32 __user *fence_ptr) { unsigned int index = drm_connector_index(connector); if (!fence_ptr) return 0; if (put_user(-1, fence_ptr)) return -EFAULT; state->connectors[index].out_fence_ptr = fence_ptr; return 0; } static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, struct drm_connector *connector) { unsigned int index = drm_connector_index(connector); s32 __user *fence_ptr; fence_ptr = state->connectors[index].out_fence_ptr; state->connectors[index].out_fence_ptr = NULL; return fence_ptr; } static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, struct drm_crtc_state *state, struct drm_property *property, uint64_t val) { struct drm_device *dev = crtc->dev; struct drm_mode_config *config = &dev->mode_config; bool replaced = false; int ret; if (property == config->prop_active) state->active = val; else if (property == config->prop_mode_id) { struct drm_property_blob *mode = drm_property_lookup_blob(dev, val); ret = drm_atomic_set_mode_prop_for_crtc(state, mode); drm_property_blob_put(mode); return ret; } else if (property == config->prop_vrr_enabled) { state->vrr_enabled = val; } else if (property == config->degamma_lut_property) { ret = drm_property_replace_blob_from_id(dev, &state->degamma_lut, val, -1, sizeof(struct drm_color_lut), &replaced); state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { ret = drm_property_replace_blob_from_id(dev, &state->ctm, val, sizeof(struct drm_color_ctm), -1, &replaced); state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { ret = drm_property_replace_blob_from_id(dev, &state->gamma_lut, val, -1, sizeof(struct drm_color_lut), &replaced); state->color_mgmt_changed |= replaced; return ret; } else if (property == config->prop_out_fence_ptr) { s32 __user *fence_ptr = u64_to_user_ptr(val); if (!fence_ptr) return 0; if (put_user(-1, fence_ptr)) return -EFAULT; set_out_fence_for_crtc(state->state, crtc, fence_ptr); } else if (property == crtc->scaling_filter_property) { state->scaling_filter = val; } else if (crtc->funcs->atomic_set_property) { return crtc->funcs->atomic_set_property(crtc, state, property, val); } else { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] unknown property [PROP:%d:%s]\n", crtc->base.id, crtc->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_crtc_get_property(struct drm_crtc *crtc, const struct drm_crtc_state *state, struct drm_property *property, uint64_t *val) { struct drm_device *dev = crtc->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_active) *val = drm_atomic_crtc_effectively_active(state); else if (property == config->prop_mode_id) *val = (state->mode_blob) ? state->mode_blob->base.id : 0; else if (property == config->prop_vrr_enabled) *val = state->vrr_enabled; else if (property == config->degamma_lut_property) *val = (state->degamma_lut) ? state->degamma_lut->base.id : 0; else if (property == config->ctm_property) *val = (state->ctm) ? state->ctm->base.id : 0; else if (property == config->gamma_lut_property) *val = (state->gamma_lut) ? state->gamma_lut->base.id : 0; else if (property == config->prop_out_fence_ptr) *val = 0; else if (property == crtc->scaling_filter_property) *val = state->scaling_filter; else if (crtc->funcs->atomic_get_property) return crtc->funcs->atomic_get_property(crtc, state, property, val); else { drm_dbg_atomic(dev, "[CRTC:%d:%s] unknown property [PROP:%d:%s]\n", crtc->base.id, crtc->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_plane_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_file *file_priv, struct drm_property *property, uint64_t val) { struct drm_device *dev = plane->dev; struct drm_mode_config *config = &dev->mode_config; bool replaced = false; int ret; if (property == config->prop_fb_id) { struct drm_framebuffer *fb; fb = drm_framebuffer_lookup(dev, file_priv, val); drm_atomic_set_fb_for_plane(state, fb); if (fb) drm_framebuffer_put(fb); } else if (property == config->prop_in_fence_fd) { if (state->fence) return -EINVAL; if (U642I64(val) == -1) return 0; state->fence = sync_file_get_fence(val); if (!state->fence) return -EINVAL; } else if (property == config->prop_crtc_id) { struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); if (val && !crtc) { drm_dbg_atomic(dev, "[PROP:%d:%s] cannot find CRTC with ID %llu\n", property->base.id, property->name, val); return -EACCES; } return drm_atomic_set_crtc_for_plane(state, crtc); } else if (property == config->prop_crtc_x) { state->crtc_x = U642I64(val); } else if (property == config->prop_crtc_y) { state->crtc_y = U642I64(val); } else if (property == config->prop_crtc_w) { state->crtc_w = val; } else if (property == config->prop_crtc_h) { state->crtc_h = val; } else if (property == config->prop_src_x) { state->src_x = val; } else if (property == config->prop_src_y) { state->src_y = val; } else if (property == config->prop_src_w) { state->src_w = val; } else if (property == config->prop_src_h) { state->src_h = val; } else if (property == plane->alpha_property) { state->alpha = val; } else if (property == plane->blend_mode_property) { state->pixel_blend_mode = val; } else if (property == plane->rotation_property) { if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK)) { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] bad rotation bitmask: 0x%llx\n", plane->base.id, plane->name, val); return -EINVAL; } state->rotation = val; } else if (property == plane->zpos_property) { state->zpos = val; } else if (property == plane->color_encoding_property) { state->color_encoding = val; } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == config->prop_fb_damage_clips) { ret = drm_property_replace_blob_from_id(dev, &state->fb_damage_clips, val, -1, sizeof(struct drm_mode_rect), &replaced); return ret; } else if (property == plane->scaling_filter_property) { state->scaling_filter = val; } else if (plane->funcs->atomic_set_property) { return plane->funcs->atomic_set_property(plane, state, property, val); } else if (property == plane->hotspot_x_property) { if (plane->type != DRM_PLANE_TYPE_CURSOR) { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", plane->base.id, plane->name, val); return -EINVAL; } state->hotspot_x = val; } else if (property == plane->hotspot_y_property) { if (plane->type != DRM_PLANE_TYPE_CURSOR) { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", plane->base.id, plane->name, val); return -EINVAL; } state->hotspot_y = val; } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", plane->base.id, plane->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_plane_get_property(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, uint64_t *val) { struct drm_device *dev = plane->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_fb_id) { *val = (state->fb) ? state->fb->base.id : 0; } else if (property == config->prop_in_fence_fd) { *val = -1; } else if (property == config->prop_crtc_id) { *val = (state->crtc) ? state->crtc->base.id : 0; } else if (property == config->prop_crtc_x) { *val = I642U64(state->crtc_x); } else if (property == config->prop_crtc_y) { *val = I642U64(state->crtc_y); } else if (property == config->prop_crtc_w) { *val = state->crtc_w; } else if (property == config->prop_crtc_h) { *val = state->crtc_h; } else if (property == config->prop_src_x) { *val = state->src_x; } else if (property == config->prop_src_y) { *val = state->src_y; } else if (property == config->prop_src_w) { *val = state->src_w; } else if (property == config->prop_src_h) { *val = state->src_h; } else if (property == plane->alpha_property) { *val = state->alpha; } else if (property == plane->blend_mode_property) { *val = state->pixel_blend_mode; } else if (property == plane->rotation_property) { *val = state->rotation; } else if (property == plane->zpos_property) { *val = state->zpos; } else if (property == plane->color_encoding_property) { *val = state->color_encoding; } else if (property == plane->color_range_property) { *val = state->color_range; } else if (property == config->prop_fb_damage_clips) { *val = (state->fb_damage_clips) ? state->fb_damage_clips->base.id : 0; } else if (property == plane->scaling_filter_property) { *val = state->scaling_filter; } else if (plane->funcs->atomic_get_property) { return plane->funcs->atomic_get_property(plane, state, property, val); } else if (property == plane->hotspot_x_property) { *val = state->hotspot_x; } else if (property == plane->hotspot_y_property) { *val = state->hotspot_y; } else { drm_dbg_atomic(dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", plane->base.id, plane->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_set_writeback_fb_for_connector( struct drm_connector_state *conn_state, struct drm_framebuffer *fb) { int ret; struct drm_connector *conn = conn_state->connector; ret = drm_writeback_set_fb(conn_state, fb); if (ret < 0) return ret; if (fb) drm_dbg_atomic(conn->dev, "Set [FB:%d] for connector state %p\n", fb->base.id, conn_state); else drm_dbg_atomic(conn->dev, "Set [NOFB] for connector state %p\n", conn_state); return 0; } static int drm_atomic_connector_set_property(struct drm_connector *connector, struct drm_connector_state *state, struct drm_file *file_priv, struct drm_property *property, uint64_t val) { struct drm_device *dev = connector->dev; struct drm_mode_config *config = &dev->mode_config; bool replaced = false; int ret; if (property == config->prop_crtc_id) { struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val); if (val && !crtc) { drm_dbg_atomic(dev, "[PROP:%d:%s] cannot find CRTC with ID %llu\n", property->base.id, property->name, val); return -EACCES; } return drm_atomic_set_crtc_for_connector(state, crtc); } else if (property == config->dpms_property) { /* setting DPMS property requires special handling, which * is done in legacy setprop path for us. Disallow (for * now?) atomic writes to DPMS property: */ drm_dbg_atomic(dev, "legacy [PROP:%d:%s] can only be set via legacy uAPI\n", property->base.id, property->name); return -EINVAL; } else if (property == config->tv_select_subconnector_property) { state->tv.select_subconnector = val; } else if (property == config->tv_subconnector_property) { state->tv.subconnector = val; } else if (property == config->tv_left_margin_property) { state->tv.margins.left = val; } else if (property == config->tv_right_margin_property) { state->tv.margins.right = val; } else if (property == config->tv_top_margin_property) { state->tv.margins.top = val; } else if (property == config->tv_bottom_margin_property) { state->tv.margins.bottom = val; } else if (property == config->legacy_tv_mode_property) { state->tv.legacy_mode = val; } else if (property == config->tv_mode_property) { state->tv.mode = val; } else if (property == config->tv_brightness_property) { state->tv.brightness = val; } else if (property == config->tv_contrast_property) { state->tv.contrast = val; } else if (property == config->tv_flicker_reduction_property) { state->tv.flicker_reduction = val; } else if (property == config->tv_overscan_property) { state->tv.overscan = val; } else if (property == config->tv_saturation_property) { state->tv.saturation = val; } else if (property == config->tv_hue_property) { state->tv.hue = val; } else if (property == config->link_status_property) { /* Never downgrade from GOOD to BAD on userspace's request here, * only hw issues can do that. * * For an atomic property the userspace doesn't need to be able * to understand all the properties, but needs to be able to * restore the state it wants on VT switch. So if the userspace * tries to change the link_status from GOOD to BAD, driver * silently rejects it and returns a 0. This prevents userspace * from accidentally breaking the display when it restores the * state. */ if (state->link_status != DRM_LINK_STATUS_GOOD) state->link_status = val; } else if (property == config->hdr_output_metadata_property) { ret = drm_property_replace_blob_from_id(dev, &state->hdr_output_metadata, val, sizeof(struct hdr_output_metadata), -1, &replaced); return ret; } else if (property == config->aspect_ratio_property) { state->picture_aspect_ratio = val; } else if (property == config->content_type_property) { state->content_type = val; } else if (property == connector->scaling_mode_property) { state->scaling_mode = val; } else if (property == config->content_protection_property) { if (val == DRM_MODE_CONTENT_PROTECTION_ENABLED) { drm_dbg_kms(dev, "only drivers can set CP Enabled\n"); return -EINVAL; } state->content_protection = val; } else if (property == config->hdcp_content_type_property) { state->hdcp_content_type = val; } else if (property == connector->colorspace_property) { state->colorspace = val; } else if (property == config->writeback_fb_id_property) { struct drm_framebuffer *fb; int ret; fb = drm_framebuffer_lookup(dev, file_priv, val); ret = drm_atomic_set_writeback_fb_for_connector(state, fb); if (fb) drm_framebuffer_put(fb); return ret; } else if (property == config->writeback_out_fence_ptr_property) { s32 __user *fence_ptr = u64_to_user_ptr(val); return set_out_fence_for_connector(state->state, connector, fence_ptr); } else if (property == connector->max_bpc_property) { state->max_requested_bpc = val; } else if (property == connector->privacy_screen_sw_state_property) { state->privacy_screen_sw_state = val; } else if (property == connector->broadcast_rgb_property) { state->hdmi.broadcast_rgb = val; } else if (connector->funcs->atomic_set_property) { return connector->funcs->atomic_set_property(connector, state, property, val); } else { drm_dbg_atomic(connector->dev, "[CONNECTOR:%d:%s] unknown property [PROP:%d:%s]\n", connector->base.id, connector->name, property->base.id, property->name); return -EINVAL; } return 0; } static int drm_atomic_connector_get_property(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, uint64_t *val) { struct drm_device *dev = connector->dev; struct drm_mode_config *config = &dev->mode_config; if (property == config->prop_crtc_id) { *val = (state->crtc) ? state->crtc->base.id : 0; } else if (property == config->dpms_property) { if (state->crtc && state->crtc->state->self_refresh_active) *val = DRM_MODE_DPMS_ON; else *val = connector->dpms; } else if (property == config->tv_select_subconnector_property) { *val = state->tv.select_subconnector; } else if (property == config->tv_subconnector_property) { *val = state->tv.subconnector; } else if (property == config->tv_left_margin_property) { *val = state->tv.margins.left; } else if (property == config->tv_right_margin_property) { *val = state->tv.margins.right; } else if (property == config->tv_top_margin_property) { *val = state->tv.margins.top; } else if (property == config->tv_bottom_margin_property) { *val = state->tv.margins.bottom; } else if (property == config->legacy_tv_mode_property) { *val = state->tv.legacy_mode; } else if (property == config->tv_mode_property) { *val = state->tv.mode; } else if (property == config->tv_brightness_property) { *val = state->tv.brightness; } else if (property == config->tv_contrast_property) { *val = state->tv.contrast; } else if (property == config->tv_flicker_reduction_property) { *val = state->tv.flicker_reduction; } else if (property == config->tv_overscan_property) { *val = state->tv.overscan; } else if (property == config->tv_saturation_property) { *val = state->tv.saturation; } else if (property == config->tv_hue_property) { *val = state->tv.hue; } else if (property == config->link_status_property) { *val = state->link_status; } else if (property == config->aspect_ratio_property) { *val = state->picture_aspect_ratio; } else if (property == config->content_type_property) { *val = state->content_type; } else if (property == connector->colorspace_property) { *val = state->colorspace; } else if (property == connector->scaling_mode_property) { *val = state->scaling_mode; } else if (property == config->hdr_output_metadata_property) { *val = state->hdr_output_metadata ? state->hdr_output_metadata->base.id : 0; } else if (property == config->content_protection_property) { *val = state->content_protection; } else if (property == config->hdcp_content_type_property) { *val = state->hdcp_content_type; } else if (property == config->writeback_fb_id_property) { /* Writeback framebuffer is one-shot, write and forget */ *val = 0; } else if (property == config->writeback_out_fence_ptr_property) { *val = 0; } else if (property == connector->max_bpc_property) { *val = state->max_requested_bpc; } else if (property == connector->privacy_screen_sw_state_property) { *val = state->privacy_screen_sw_state; } else if (property == connector->broadcast_rgb_property) { *val = state->hdmi.broadcast_rgb; } else if (connector->funcs->atomic_get_property) { return connector->funcs->atomic_get_property(connector, state, property, val); } else { drm_dbg_atomic(dev, "[CONNECTOR:%d:%s] unknown property [PROP:%d:%s]\n", connector->base.id, connector->name, property->base.id, property->name); return -EINVAL; } return 0; } int drm_atomic_get_property(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val) { struct drm_device *dev = property->dev; int ret; switch (obj->type) { case DRM_MODE_OBJECT_CONNECTOR: { struct drm_connector *connector = obj_to_connector(obj); WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); ret = drm_atomic_connector_get_property(connector, connector->state, property, val); break; } case DRM_MODE_OBJECT_CRTC: { struct drm_crtc *crtc = obj_to_crtc(obj); WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); ret = drm_atomic_crtc_get_property(crtc, crtc->state, property, val); break; } case DRM_MODE_OBJECT_PLANE: { struct drm_plane *plane = obj_to_plane(obj); WARN_ON(!drm_modeset_is_locked(&plane->mutex)); ret = drm_atomic_plane_get_property(plane, plane->state, property, val); break; } default: drm_dbg_atomic(dev, "[OBJECT:%d] has no properties\n", obj->id); ret = -EINVAL; break; } return ret; } /* * The big monster ioctl */ static struct drm_pending_vblank_event *create_vblank_event( struct drm_crtc *crtc, uint64_t user_data) { struct drm_pending_vblank_event *e = NULL; e = kzalloc(sizeof *e, GFP_KERNEL); if (!e) return NULL; e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); e->event.vbl.crtc_id = crtc->base.id; e->event.vbl.user_data = user_data; return e; } int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, struct drm_connector *connector, int mode) { struct drm_connector *tmp_connector; struct drm_connector_state *new_conn_state; struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; int i, ret, old_mode = connector->dpms; bool active = false; ret = drm_modeset_lock(&state->dev->mode_config.connection_mutex, state->acquire_ctx); if (ret) return ret; if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; connector->dpms = mode; crtc = connector->state->crtc; if (!crtc) goto out; ret = drm_atomic_add_affected_connectors(state, crtc); if (ret) goto out; crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto out; } for_each_new_connector_in_state(state, tmp_connector, new_conn_state, i) { if (new_conn_state->crtc != crtc) continue; if (tmp_connector->dpms == DRM_MODE_DPMS_ON) { active = true; break; } } crtc_state->active = active; ret = drm_atomic_commit(state); out: if (ret != 0) connector->dpms = old_mode; return ret; } static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t prop_value, struct drm_property *prop) { if (ret != 0 || old_val != prop_value) { drm_dbg_atomic(prop->dev, "[PROP:%d:%s] No prop can be changed during async flip\n", prop->base.id, prop->name); return -EINVAL; } return 0; } int drm_atomic_set_property(struct drm_atomic_state *state, struct drm_file *file_priv, struct drm_mode_object *obj, struct drm_property *prop, u64 prop_value, bool async_flip) { struct drm_mode_object *ref; u64 old_val; int ret; if (!drm_property_change_valid_get(prop, prop_value, &ref)) return -EINVAL; switch (obj->type) { case DRM_MODE_OBJECT_CONNECTOR: { struct drm_connector *connector = obj_to_connector(obj); struct drm_connector_state *connector_state; connector_state = drm_atomic_get_connector_state(state, connector); if (IS_ERR(connector_state)) { ret = PTR_ERR(connector_state); break; } if (async_flip) { ret = drm_atomic_connector_get_property(connector, connector_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } ret = drm_atomic_connector_set_property(connector, connector_state, file_priv, prop, prop_value); break; } case DRM_MODE_OBJECT_CRTC: { struct drm_crtc *crtc = obj_to_crtc(obj); struct drm_crtc_state *crtc_state; crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); break; } if (async_flip) { ret = drm_atomic_crtc_get_property(crtc, crtc_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } ret = drm_atomic_crtc_set_property(crtc, crtc_state, prop, prop_value); break; } case DRM_MODE_OBJECT_PLANE: { struct drm_plane *plane = obj_to_plane(obj); struct drm_plane_state *plane_state; struct drm_mode_config *config = &plane->dev->mode_config; plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { ret = PTR_ERR(plane_state); break; } if (async_flip && (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || (prop != config->prop_fb_id && prop != config->prop_in_fence_fd && prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); break; } default: drm_dbg_atomic(prop->dev, "[OBJECT:%d] has no properties\n", obj->id); ret = -EINVAL; break; } drm_property_change_valid_put(prop, ref); return ret; } /** * DOC: explicit fencing properties * * Explicit fencing allows userspace to control the buffer synchronization * between devices. A Fence or a group of fences are transferred to/from * userspace using Sync File fds and there are two DRM properties for that. * IN_FENCE_FD on each DRM Plane to send fences to the kernel and * OUT_FENCE_PTR on each DRM CRTC to receive fences from the kernel. * * As a contrast, with implicit fencing the kernel keeps track of any * ongoing rendering, and automatically ensures that the atomic update waits * for any pending rendering to complete. This is usually tracked in &struct * dma_resv which can also contain mandatory kernel fences. Implicit syncing * is how Linux traditionally worked (e.g. DRI2/3 on X.org), whereas explicit * fencing is what Android wants. * * "IN_FENCE_FD”: * Use this property to pass a fence that DRM should wait on before * proceeding with the Atomic Commit request and show the framebuffer for * the plane on the screen. The fence can be either a normal fence or a * merged one, the sync_file framework will handle both cases and use a * fence_array if a merged fence is received. Passing -1 here means no * fences to wait on. * * If the Atomic Commit request has the DRM_MODE_ATOMIC_TEST_ONLY flag * it will only check if the Sync File is a valid one. * * On the driver side the fence is stored on the @fence parameter of * &struct drm_plane_state. Drivers which also support implicit fencing * should extract the implicit fence using drm_gem_plane_helper_prepare_fb(), * to make sure there's consistent behaviour between drivers in precedence * of implicit vs. explicit fencing. * * "OUT_FENCE_PTR”: * Use this property to pass a file descriptor pointer to DRM. Once the * Atomic Commit request call returns OUT_FENCE_PTR will be filled with * the file descriptor number of a Sync File. This Sync File contains the * CRTC fence that will be signaled when all framebuffers present on the * Atomic Commit * request for that given CRTC are scanned out on the * screen. * * The Atomic Commit request fails if a invalid pointer is passed. If the * Atomic Commit request fails for any other reason the out fence fd * returned will be -1. On a Atomic Commit with the * DRM_MODE_ATOMIC_TEST_ONLY flag the out fence will also be set to -1. * * Note that out-fences don't have a special interface to drivers and are * internally represented by a &struct drm_pending_vblank_event in struct * &drm_crtc_state, which is also used by the nonblocking atomic commit * helpers and for the DRM event handling for existing userspace. */ struct drm_out_fence_state { s32 __user *out_fence_ptr; struct sync_file *sync_file; int fd; }; static int setup_out_fence(struct drm_out_fence_state *fence_state, struct dma_fence *fence) { fence_state->fd = get_unused_fd_flags(O_CLOEXEC); if (fence_state->fd < 0) return fence_state->fd; if (put_user(fence_state->fd, fence_state->out_fence_ptr)) return -EFAULT; fence_state->sync_file = sync_file_create(fence); if (!fence_state->sync_file) return -ENOMEM; return 0; } static int prepare_signaling(struct drm_device *dev, struct drm_atomic_state *state, struct drm_mode_atomic *arg, struct drm_file *file_priv, struct drm_out_fence_state **fence_state, unsigned int *num_fences) { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; struct drm_connector *conn; struct drm_connector_state *conn_state; int i, c = 0, ret; if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) return 0; for_each_new_crtc_in_state(state, crtc, crtc_state, i) { s32 __user *fence_ptr; fence_ptr = get_out_fence_for_crtc(crtc_state->state, crtc); if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT || fence_ptr) { struct drm_pending_vblank_event *e; e = create_vblank_event(crtc, arg->user_data); if (!e) return -ENOMEM; crtc_state->event = e; } if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) { struct drm_pending_vblank_event *e = crtc_state->event; if (!file_priv) continue; ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); crtc_state->event = NULL; return ret; } } if (fence_ptr) { struct dma_fence *fence; struct drm_out_fence_state *f; f = krealloc(*fence_state, sizeof(**fence_state) * (*num_fences + 1), GFP_KERNEL); if (!f) return -ENOMEM; memset(&f[*num_fences], 0, sizeof(*f)); f[*num_fences].out_fence_ptr = fence_ptr; *fence_state = f; fence = drm_crtc_create_fence(crtc); if (!fence) return -ENOMEM; ret = setup_out_fence(&f[(*num_fences)++], fence); if (ret) { dma_fence_put(fence); return ret; } crtc_state->event->base.fence = fence; } c++; } for_each_new_connector_in_state(state, conn, conn_state, i) { struct drm_writeback_connector *wb_conn; struct drm_out_fence_state *f; struct dma_fence *fence; s32 __user *fence_ptr; if (!conn_state->writeback_job) continue; fence_ptr = get_out_fence_for_connector(state, conn); if (!fence_ptr) continue; f = krealloc(*fence_state, sizeof(**fence_state) * (*num_fences + 1), GFP_KERNEL); if (!f) return -ENOMEM; memset(&f[*num_fences], 0, sizeof(*f)); f[*num_fences].out_fence_ptr = fence_ptr; *fence_state = f; wb_conn = drm_connector_to_writeback(conn); fence = drm_writeback_get_out_fence(wb_conn); if (!fence) return -ENOMEM; ret = setup_out_fence(&f[(*num_fences)++], fence); if (ret) { dma_fence_put(fence); return ret; } conn_state->writeback_job->out_fence = fence; } /* * Having this flag means user mode pends on event which will never * reach due to lack of at least one CRTC for signaling */ if (c == 0 && (arg->flags & DRM_MODE_PAGE_FLIP_EVENT)) { drm_dbg_atomic(dev, "need at least one CRTC for DRM_MODE_PAGE_FLIP_EVENT"); return -EINVAL; } return 0; } static void complete_signaling(struct drm_device *dev, struct drm_atomic_state *state, struct drm_out_fence_state *fence_state, unsigned int num_fences, bool install_fds) { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; int i; if (install_fds) { for (i = 0; i < num_fences; i++) fd_install(fence_state[i].fd, fence_state[i].sync_file->file); kfree(fence_state); return; } for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct drm_pending_vblank_event *event = crtc_state->event; /* * Free the allocated event. drm_atomic_helper_setup_commit * can allocate an event too, so only free it if it's ours * to prevent a double free in drm_atomic_state_clear. */ if (event && (event->base.fence || event->base.file_priv)) { drm_event_cancel_free(dev, &event->base); crtc_state->event = NULL; } } if (!fence_state) return; for (i = 0; i < num_fences; i++) { if (fence_state[i].sync_file) fput(fence_state[i].sync_file->file); if (fence_state[i].fd >= 0) put_unused_fd(fence_state[i].fd); /* If this fails log error to the user */ if (fence_state[i].out_fence_ptr && put_user(-1, fence_state[i].out_fence_ptr)) drm_dbg_atomic(dev, "Couldn't clear out_fence_ptr\n"); } kfree(fence_state); } static void set_async_flip(struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; int i; for_each_new_crtc_in_state(state, crtc, crtc_state, i) { crtc_state->async_flip = true; } } int drm_mode_atomic_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_atomic *arg = data; uint32_t __user *objs_ptr = (uint32_t __user *)(unsigned long)(arg->objs_ptr); uint32_t __user *count_props_ptr = (uint32_t __user *)(unsigned long)(arg->count_props_ptr); uint32_t __user *props_ptr = (uint32_t __user *)(unsigned long)(arg->props_ptr); uint64_t __user *prop_values_ptr = (uint64_t __user *)(unsigned long)(arg->prop_values_ptr); unsigned int copied_objs, copied_props; struct drm_atomic_state *state; struct drm_modeset_acquire_ctx ctx; struct drm_out_fence_state *fence_state; int ret = 0; unsigned int i, j, num_fences; bool async_flip = false; /* disallow for drivers not supporting atomic: */ if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) return -EOPNOTSUPP; /* disallow for userspace that has not enabled atomic cap (even * though this may be a bit overkill, since legacy userspace * wouldn't know how to call this ioctl) */ if (!file_priv->atomic) { drm_dbg_atomic(dev, "commit failed: atomic cap not enabled\n"); return -EINVAL; } if (arg->flags & ~DRM_MODE_ATOMIC_FLAGS) { drm_dbg_atomic(dev, "commit failed: invalid flag\n"); return -EINVAL; } if (arg->reserved) { drm_dbg_atomic(dev, "commit failed: reserved field set\n"); return -EINVAL; } if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) { if (!dev->mode_config.async_page_flip) { drm_dbg_atomic(dev, "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported\n"); return -EINVAL; } async_flip = true; } /* can't test and expect an event at the same time. */ if ((arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) && (arg->flags & DRM_MODE_PAGE_FLIP_EVENT)) { drm_dbg_atomic(dev, "commit failed: page-flip event requested with test-only commit\n"); return -EINVAL; } state = drm_atomic_state_alloc(dev); if (!state) return -ENOMEM; drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); state->acquire_ctx = &ctx; state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET); retry: copied_objs = 0; copied_props = 0; fence_state = NULL; num_fences = 0; for (i = 0; i < arg->count_objs; i++) { uint32_t obj_id, count_props; struct drm_mode_object *obj; if (get_user(obj_id, objs_ptr + copied_objs)) { ret = -EFAULT; goto out; } obj = drm_mode_object_find(dev, file_priv, obj_id, DRM_MODE_OBJECT_ANY); if (!obj) { drm_dbg_atomic(dev, "cannot find object ID %d", obj_id); ret = -ENOENT; goto out; } if (!obj->properties) { drm_dbg_atomic(dev, "[OBJECT:%d] has no properties", obj_id); drm_mode_object_put(obj); ret = -ENOENT; goto out; } if (get_user(count_props, count_props_ptr + copied_objs)) { drm_mode_object_put(obj); ret = -EFAULT; goto out; } copied_objs++; for (j = 0; j < count_props; j++) { uint32_t prop_id; uint64_t prop_value; struct drm_property *prop; if (get_user(prop_id, props_ptr + copied_props)) { drm_mode_object_put(obj); ret = -EFAULT; goto out; } prop = drm_mode_obj_find_prop_id(obj, prop_id); if (!prop) { drm_dbg_atomic(dev, "[OBJECT:%d] cannot find property ID %d", obj_id, prop_id); drm_mode_object_put(obj); ret = -ENOENT; goto out; } if (copy_from_user(&prop_value, prop_values_ptr + copied_props, sizeof(prop_value))) { drm_mode_object_put(obj); ret = -EFAULT; goto out; } ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, async_flip); if (ret) { drm_mode_object_put(obj); goto out; } copied_props++; } drm_mode_object_put(obj); } ret = prepare_signaling(dev, state, arg, file_priv, &fence_state, &num_fences); if (ret) goto out; if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) set_async_flip(state); if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) { ret = drm_atomic_check_only(state); } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) { ret = drm_atomic_nonblocking_commit(state); } else { ret = drm_atomic_commit(state); } out: complete_signaling(dev, state, fence_state, num_fences, !ret); if (ret == -EDEADLK) { drm_atomic_state_clear(state); ret = drm_modeset_backoff(&ctx); if (!ret) goto retry; } drm_atomic_state_put(state); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; }
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 /* SPDX-License-Identifier: GPL-2.0 */ /* include/net/dsfield.h - Manipulation of the Differentiated Services field */ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ #ifndef __NET_DSFIELD_H #define __NET_DSFIELD_H #include <linux/types.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <asm/byteorder.h> static inline __u8 ipv4_get_dsfield(const struct iphdr *iph) { return iph->tos; } static inline __u8 ipv6_get_dsfield(const struct ipv6hdr *ipv6h) { return ntohs(*(__force const __be16 *)ipv6h) >> 4; } static inline void ipv4_change_dsfield(struct iphdr *iph,__u8 mask, __u8 value) { __u32 check = ntohs((__force __be16)iph->check); __u8 dsfield; dsfield = (iph->tos & mask) | value; check += iph->tos; if ((check+1) >> 16) check = (check+1) & 0xffff; check -= dsfield; check += check >> 16; /* adjust carry */ iph->check = (__force __sum16)htons(check); iph->tos = dsfield; } static inline void ipv6_change_dsfield(struct ipv6hdr *ipv6h,__u8 mask, __u8 value) { __be16 *p = (__force __be16 *)ipv6h; *p = (*p & htons((((u16)mask << 4) | 0xf00f))) | htons((u16)value << 4); } #endif
4 3 1 12 4 1 1 1 1 15 15 15 15 1311 621 1304 3 3 268 269 1546 1547 1 1545 8 1548 3 1117 1 1119 1050 1050 68 68 55 1525 4 1528 12 12 623 1054 1055 623 1079 75 1058 1 1 1 5 1 1 3 1 428 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 // SPDX-License-Identifier: GPL-2.0 /* * inode.c - part of debugfs, a tiny little debug file system * * Copyright (C) 2004,2019 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. * Copyright (C) 2019 Linux Foundation <gregkh@linuxfoundation.org> * * debugfs is for people to use instead of /proc or /sys. * See ./Documentation/core-api/kernel-api.rst for more details. */ #define pr_fmt(fmt) "debugfs: " fmt #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/kobject.h> #include <linux/namei.h> #include <linux/debugfs.h> #include <linux/fsnotify.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/security.h> #include "internal.h" #define DEBUGFS_DEFAULT_MODE 0700 static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS; /* * Don't allow access attributes to be changed whilst the kernel is locked down * so that we can use the file mode as part of a heuristic to determine whether * to lock down individual files. */ static int debugfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *ia) { int ret; if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) { ret = security_locked_down(LOCKDOWN_DEBUGFS); if (ret) return ret; } return simple_setattr(&nop_mnt_idmap, dentry, ia); } static const struct inode_operations debugfs_file_inode_operations = { .setattr = debugfs_setattr, }; static const struct inode_operations debugfs_dir_inode_operations = { .lookup = simple_lookup, .setattr = debugfs_setattr, }; static const struct inode_operations debugfs_symlink_inode_operations = { .get_link = simple_get_link, .setattr = debugfs_setattr, }; static struct inode *debugfs_get_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); } return inode; } struct debugfs_fs_info { kuid_t uid; kgid_t gid; umode_t mode; /* Opt_* bitfield. */ unsigned int opts; }; enum { Opt_uid, Opt_gid, Opt_mode, Opt_source, }; static const struct fs_parameter_spec debugfs_param_specs[] = { fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("mode", Opt_mode), fsparam_uid ("uid", Opt_uid), fsparam_string ("source", Opt_source), {} }; static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct debugfs_fs_info *opts = fc->s_fs_info; struct fs_parse_result result; int opt; opt = fs_parse(fc, debugfs_param_specs, param, &result); if (opt < 0) { /* * We might like to report bad mount options here; but * traditionally debugfs has ignored all mount options */ if (opt == -ENOPARAM) return 0; return opt; } switch (opt) { case Opt_uid: opts->uid = result.uid; break; case Opt_gid: opts->gid = result.gid; break; case Opt_mode: opts->mode = result.uint_32 & S_IALLUGO; break; case Opt_source: if (fc->source) return invalfc(fc, "Multiple sources specified"); fc->source = param->string; param->string = NULL; break; /* * We might like to report bad mount options here; * but traditionally debugfs has ignored all mount options */ } opts->opts |= BIT(opt); return 0; } static void _debugfs_apply_options(struct super_block *sb, bool remount) { struct debugfs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); /* * On remount, only reset mode/uid/gid if they were provided as mount * options. */ if (!remount || fsi->opts & BIT(Opt_mode)) { inode->i_mode &= ~S_IALLUGO; inode->i_mode |= fsi->mode; } if (!remount || fsi->opts & BIT(Opt_uid)) inode->i_uid = fsi->uid; if (!remount || fsi->opts & BIT(Opt_gid)) inode->i_gid = fsi->gid; } static void debugfs_apply_options(struct super_block *sb) { _debugfs_apply_options(sb, false); } static void debugfs_apply_options_remount(struct super_block *sb) { _debugfs_apply_options(sb, true); } static int debugfs_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct debugfs_fs_info *sb_opts = sb->s_fs_info; struct debugfs_fs_info *new_opts = fc->s_fs_info; sync_filesystem(sb); /* structure copy of new mount options to sb */ *sb_opts = *new_opts; debugfs_apply_options_remount(sb); return 0; } static int debugfs_show_options(struct seq_file *m, struct dentry *root) { struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, fsi->uid)); if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, fsi->gid)); if (fsi->mode != DEBUGFS_DEFAULT_MODE) seq_printf(m, ",mode=%o", fsi->mode); return 0; } static void debugfs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); free_inode_nonrcu(inode); } static const struct super_operations debugfs_super_operations = { .statfs = simple_statfs, .show_options = debugfs_show_options, .free_inode = debugfs_free_inode, }; static void debugfs_release_dentry(struct dentry *dentry) { struct debugfs_fsdata *fsd = dentry->d_fsdata; if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; /* check it wasn't a dir (no fsdata) or automount (no real_fops) */ if (fsd && fsd->real_fops) { WARN_ON(!list_empty(&fsd->cancellations)); mutex_destroy(&fsd->cancellations_mtx); } kfree(fsd); } static struct vfsmount *debugfs_automount(struct path *path) { struct debugfs_fsdata *fsd = path->dentry->d_fsdata; return fsd->automount(path->dentry, d_inode(path->dentry)->i_private); } static const struct dentry_operations debugfs_dops = { .d_delete = always_delete_dentry, .d_release = debugfs_release_dentry, .d_automount = debugfs_automount, }; static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr debug_files[] = {{""}}; int err; err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); if (err) return err; sb->s_op = &debugfs_super_operations; sb->s_d_op = &debugfs_dops; debugfs_apply_options(sb); return 0; } static int debugfs_get_tree(struct fs_context *fc) { if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return -EPERM; return get_tree_single(fc, debugfs_fill_super); } static void debugfs_free_fc(struct fs_context *fc) { kfree(fc->s_fs_info); } static const struct fs_context_operations debugfs_context_ops = { .free = debugfs_free_fc, .parse_param = debugfs_parse_param, .get_tree = debugfs_get_tree, .reconfigure = debugfs_reconfigure, }; static int debugfs_init_fs_context(struct fs_context *fc) { struct debugfs_fs_info *fsi; fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL); if (!fsi) return -ENOMEM; fsi->mode = DEBUGFS_DEFAULT_MODE; fc->s_fs_info = fsi; fc->ops = &debugfs_context_ops; return 0; } static struct file_system_type debug_fs_type = { .owner = THIS_MODULE, .name = "debugfs", .init_fs_context = debugfs_init_fs_context, .parameters = debugfs_param_specs, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("debugfs"); /** * debugfs_lookup() - look up an existing debugfs file * @name: a pointer to a string containing the name of the file to look up. * @parent: a pointer to the parent dentry of the file. * * This function will return a pointer to a dentry if it succeeds. If the file * doesn't exist or an error occurs, %NULL will be returned. The returned * dentry must be passed to dput() when it is no longer needed. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) { struct dentry *dentry; if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent)) return NULL; if (!parent) parent = debugfs_mount->mnt_root; dentry = lookup_positive_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) return NULL; return dentry; } EXPORT_SYMBOL_GPL(debugfs_lookup); static struct dentry *start_creating(const char *name, struct dentry *parent) { struct dentry *dentry; int error; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return ERR_PTR(-EPERM); if (!debugfs_initialized()) return ERR_PTR(-ENOENT); pr_debug("creating file '%s'\n", name); if (IS_ERR(parent)) return parent; error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) { pr_err("Unable to pin filesystem for file '%s'\n", name); return ERR_PTR(error); } /* If the parent is not specified, we create it in the root. * We need the root dentry to do this, which is in the super * block. A pointer to that is in the struct vfsmount that we * have around. */ if (!parent) parent = debugfs_mount->mnt_root; inode_lock(d_inode(parent)); if (unlikely(IS_DEADDIR(d_inode(parent)))) dentry = ERR_PTR(-ENOENT); else dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry) && d_really_is_positive(dentry)) { if (d_is_dir(dentry)) pr_err("Directory '%s' with parent '%s' already present!\n", name, parent->d_name.name); else pr_err("File '%s' in directory '%s' already present!\n", name, parent->d_name.name); dput(dentry); dentry = ERR_PTR(-EEXIST); } if (IS_ERR(dentry)) { inode_unlock(d_inode(parent)); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } return dentry; } static struct dentry *failed_creating(struct dentry *dentry) { inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); return ERR_PTR(-ENOMEM); } static struct dentry *end_creating(struct dentry *dentry) { inode_unlock(d_inode(dentry->d_parent)); return dentry; } static struct dentry *__debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *proxy_fops, const void *real_fops) { struct dentry *dentry; struct inode *inode; if (!(mode & S_IFMT)) mode |= S_IFREG; BUG_ON(!S_ISREG(mode)); dentry = start_creating(name, parent); if (IS_ERR(dentry)) return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); return ERR_PTR(-EPERM); } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create file '%s'\n", name); return failed_creating(dentry); } inode->i_mode = mode; inode->i_private = data; inode->i_op = &debugfs_file_inode_operations; inode->i_fop = proxy_fops; dentry->d_fsdata = (void *)((unsigned long)real_fops | DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } struct dentry *debugfs_create_file_full(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { if (WARN_ON((unsigned long)fops & (DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT | DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) return ERR_PTR(-EINVAL); return __debugfs_create_file(name, mode, parent, data, fops ? &debugfs_full_proxy_file_operations : &debugfs_noop_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file_full); struct dentry *debugfs_create_file_short(const char *name, umode_t mode, struct dentry *parent, void *data, const struct debugfs_short_fops *fops) { if (WARN_ON((unsigned long)fops & (DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT | DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) return ERR_PTR(-EINVAL); return __debugfs_create_file(name, mode, parent, data, fops ? &debugfs_full_proxy_file_operations : &debugfs_noop_file_operations, (const void *)((unsigned long)fops | DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT)); } EXPORT_SYMBOL_GPL(debugfs_create_file_short); /** * debugfs_create_file_unsafe - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * * debugfs_create_file_unsafe() is completely analogous to * debugfs_create_file(), the only difference being that the fops * handed it will not get protected against file removals by the * debugfs core. * * It is your responsibility to protect your struct file_operation * methods against file removals by means of debugfs_file_get() * and debugfs_file_put(). ->open() is still protected by * debugfs though. * * Any struct file_operations defined by means of * DEFINE_DEBUGFS_ATTRIBUTE() is protected against file removals and * thus, may be used here. */ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { return __debugfs_create_file(name, mode, parent, data, fops ? &debugfs_open_proxy_file_operations : &debugfs_noop_file_operations, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); /** * debugfs_create_file_size - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * @file_size: initial file size * * This is the basic "create a file" function for debugfs. It allows for a * wide range of flexibility in creating a file, or a directory (if you want * to create a directory, the debugfs_create_dir() function is * recommended to be used instead.) */ void debugfs_create_file_size(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops, loff_t file_size) { struct dentry *de = debugfs_create_file(name, mode, parent, data, fops); if (!IS_ERR(de)) d_inode(de)->i_size = file_size; } EXPORT_SYMBOL_GPL(debugfs_create_file_size); /** * debugfs_create_dir - create a directory in the debugfs filesystem * @name: a pointer to a string containing the name of the directory to * create. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * directory will be created in the root of the debugfs filesystem. * * This function creates a directory in debugfs with the given name. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. * * NOTE: it's expected that most callers should _ignore_ the errors returned * by this function. Other debugfs functions handle the fact that the "dentry" * passed to them could be an error and they don't crash in that case. * Drivers should generally work fine even if debugfs fails to init anyway. */ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) { struct dentry *dentry = start_creating(name, parent); struct inode *inode; if (IS_ERR(dentry)) return dentry; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); return ERR_PTR(-EPERM); } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create directory '%s'\n", name); return failed_creating(dentry); } inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &debugfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_dir); /** * debugfs_create_automount - create automount point in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is NULL, then the * file will be created in the root of the debugfs filesystem. * @f: function to be called when pathname resolution steps on that one. * @data: opaque argument to pass to f(). * * @f should return what ->d_automount() would. */ struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, void *data) { struct dentry *dentry = start_creating(name, parent); struct debugfs_fsdata *fsd; struct inode *inode; if (IS_ERR(dentry)) return dentry; fsd = kzalloc(sizeof(*fsd), GFP_KERNEL); if (!fsd) { failed_creating(dentry); return ERR_PTR(-ENOMEM); } fsd->automount = f; if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { failed_creating(dentry); kfree(fsd); return ERR_PTR(-EPERM); } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", name); kfree(fsd); return failed_creating(dentry); } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; dentry->d_fsdata = fsd; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } EXPORT_SYMBOL(debugfs_create_automount); /** * debugfs_create_symlink- create a symbolic link in the debugfs filesystem * @name: a pointer to a string containing the name of the symbolic link to * create. * @parent: a pointer to the parent dentry for this symbolic link. This * should be a directory dentry if set. If this parameter is NULL, * then the symbolic link will be created in the root of the debugfs * filesystem. * @target: a pointer to a string containing the path to the target of the * symbolic link. * * This function creates a symbolic link with the given name in debugfs that * links to the given target path. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the symbolic * link is to be removed (no automatic cleanup happens if your module is * unloaded, you are responsible here.) If an error occurs, ERR_PTR(-ERROR) * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *target) { struct dentry *dentry; struct inode *inode; char *link = kstrdup(target, GFP_KERNEL); if (!link) return ERR_PTR(-ENOMEM); dentry = start_creating(name, parent); if (IS_ERR(dentry)) { kfree(link); return dentry; } inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create symlink '%s'\n", name); kfree(link); return failed_creating(dentry); } inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_op = &debugfs_symlink_inode_operations; inode->i_link = link; d_instantiate(dentry, inode); return end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_symlink); static void __debugfs_file_removed(struct dentry *dentry) { struct debugfs_fsdata *fsd; /* * Paired with the closing smp_mb() implied by a successful * cmpxchg() in debugfs_file_get(): either * debugfs_file_get() must see a dead dentry or we must see a * debugfs_fsdata instance at ->d_fsdata here (or both). */ smp_mb(); fsd = READ_ONCE(dentry->d_fsdata); if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) return; /* if this was the last reference, we're done */ if (refcount_dec_and_test(&fsd->active_users)) return; /* * If there's still a reference, the code that obtained it can * be in different states: * - The common case of not using cancellations, or already * after debugfs_leave_cancellation(), where we just need * to wait for debugfs_file_put() which signals the completion; * - inside a cancellation section, i.e. between * debugfs_enter_cancellation() and debugfs_leave_cancellation(), * in which case we need to trigger the ->cancel() function, * and then wait for debugfs_file_put() just like in the * previous case; * - before debugfs_enter_cancellation() (but obviously after * debugfs_file_get()), in which case we may not see the * cancellation in the list on the first round of the loop, * but debugfs_enter_cancellation() signals the completion * after adding it, so this code gets woken up to call the * ->cancel() function. */ while (refcount_read(&fsd->active_users)) { struct debugfs_cancellation *c; /* * Lock the cancellations. Note that the cancellations * structs are meant to be on the stack, so we need to * ensure we either use them here or don't touch them, * and debugfs_leave_cancellation() will wait for this * to be finished processing before exiting one. It may * of course win and remove the cancellation, but then * chances are we never even got into this bit, we only * do if the refcount isn't zero already. */ mutex_lock(&fsd->cancellations_mtx); while ((c = list_first_entry_or_null(&fsd->cancellations, typeof(*c), list))) { list_del_init(&c->list); c->cancel(dentry, c->cancel_data); } mutex_unlock(&fsd->cancellations_mtx); wait_for_completion(&fsd->active_users_drained); } } static void remove_one(struct dentry *victim) { if (d_is_reg(victim)) __debugfs_file_removed(victim); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } /** * debugfs_remove - recursively removes a directory * @dentry: a pointer to a the dentry of the directory to be removed. If this * parameter is NULL or an error value, nothing will be done. * * This function recursively removes a directory tree in debugfs that * was previously created with a call to another debugfs function * (like debugfs_create_file() or variants thereof.) * * This function is required to be called in order for the file to be * removed, no automatic cleanup of files will happen when a module is * removed, you are responsible here. */ void debugfs_remove(struct dentry *dentry) { if (IS_ERR_OR_NULL(dentry)) return; simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); simple_recursive_removal(dentry, remove_one); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove); /** * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it * @name: a pointer to a string containing the name of the item to look up. * @parent: a pointer to the parent dentry of the item. * * This is the equlivant of doing something like * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting * handled for the directory being looked up. */ void debugfs_lookup_and_remove(const char *name, struct dentry *parent) { struct dentry *dentry; dentry = debugfs_lookup(name, parent); if (!dentry) return; debugfs_remove(dentry); dput(dentry); } EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove); /** * debugfs_rename - rename a file/directory in the debugfs filesystem * @old_dir: a pointer to the parent dentry for the renamed object. This * should be a directory dentry. * @old_dentry: dentry of an object to be renamed. * @new_dir: a pointer to the parent dentry where the object should be * moved. This should be a directory dentry. * @new_name: a pointer to a string containing the target name. * * This function renames a file/directory in debugfs. The target must not * exist for rename to succeed. * * This function will return a pointer to old_dentry (which is updated to * reflect renaming) if it succeeds. If an error occurs, ERR_PTR(-ERROR) * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. */ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name) { int error; struct dentry *dentry = NULL, *trap; struct name_snapshot old_name; if (IS_ERR(old_dir)) return old_dir; if (IS_ERR(new_dir)) return new_dir; if (IS_ERR_OR_NULL(old_dentry)) return old_dentry; trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) goto exit; /* Source does not exist, cyclic rename, or mountpoint? */ if (d_really_is_negative(old_dentry) || old_dentry == trap || d_mountpoint(old_dentry)) goto exit; dentry = lookup_one_len(new_name, new_dir, strlen(new_name)); /* Lookup failed, cyclic rename or target exists? */ if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) goto exit; take_dentry_name_snapshot(&old_name, old_dentry); error = simple_rename(&nop_mnt_idmap, d_inode(old_dir), old_dentry, d_inode(new_dir), dentry, 0); if (error) { release_dentry_name_snapshot(&old_name); goto exit; } d_move(old_dentry, dentry); fsnotify_move(d_inode(old_dir), d_inode(new_dir), &old_name.name, d_is_dir(old_dentry), NULL, old_dentry); release_dentry_name_snapshot(&old_name); unlock_rename(new_dir, old_dir); dput(dentry); return old_dentry; exit: if (dentry && !IS_ERR(dentry)) dput(dentry); unlock_rename(new_dir, old_dir); if (IS_ERR(dentry)) return dentry; return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(debugfs_rename); /** * debugfs_initialized - Tells whether debugfs has been registered */ bool debugfs_initialized(void) { return debugfs_registered; } EXPORT_SYMBOL_GPL(debugfs_initialized); static int __init debugfs_kernel(char *str) { if (str) { if (!strcmp(str, "on")) debugfs_allow = DEBUGFS_ALLOW_API | DEBUGFS_ALLOW_MOUNT; else if (!strcmp(str, "no-mount")) debugfs_allow = DEBUGFS_ALLOW_API; else if (!strcmp(str, "off")) debugfs_allow = 0; } return 0; } early_param("debugfs", debugfs_kernel); static int __init debugfs_init(void) { int retval; if (!(debugfs_allow & DEBUGFS_ALLOW_MOUNT)) return -EPERM; retval = sysfs_create_mount_point(kernel_kobj, "debug"); if (retval) return retval; retval = register_filesystem(&debug_fs_type); if (retval) sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; return retval; } core_initcall(debugfs_init);
13 66 63 14 14 67 67 66 20 2 13 13 13 10 13 12 13 6 22 13 2 2 2 2 9 9 9 7 9 9 9 9 9 9 9 9 9 9 4 4 5 5 5 37 4 25 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 // SPDX-License-Identifier: GPL-2.0 /* * file.c - part of debugfs, a tiny little debug file system * * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. * * debugfs is for people to use instead of /proc or /sys. * See Documentation/filesystems/ for more details. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/io.h> #include <linux/slab.h> #include <linux/atomic.h> #include <linux/device.h> #include <linux/pm_runtime.h> #include <linux/poll.h> #include <linux/security.h> #include "internal.h" struct poll_table_struct; static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return 0; } static ssize_t default_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return count; } const struct file_operations debugfs_noop_file_operations = { .read = default_read_file, .write = default_write_file, .open = simple_open, .llseek = noop_llseek, }; #define F_DENTRY(filp) ((filp)->f_path.dentry) const struct file_operations *debugfs_real_fops(const struct file *filp) { struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) { /* * Urgh, we've been called w/o a protecting * debugfs_file_get(). */ WARN_ON(1); return NULL; } return fsd->real_fops; } EXPORT_SYMBOL_GPL(debugfs_real_fops); /** * debugfs_file_get - mark the beginning of file data access * @dentry: the dentry object whose data is being accessed. * * Up to a matching call to debugfs_file_put(), any successive call * into the file removing functions debugfs_remove() and * debugfs_remove_recursive() will block. Since associated private * file data may only get freed after a successful return of any of * the removal functions, you may safely access it after a successful * call to debugfs_file_get() without worrying about lifetime issues. * * If -%EIO is returned, the file has already been removed and thus, * it is not safe to access any of its data. If, on the other hand, * it is allowed to access the file data, zero is returned. */ int debugfs_file_get(struct dentry *dentry) { struct debugfs_fsdata *fsd; void *d_fsd; /* * This could only happen if some debugfs user erroneously calls * debugfs_file_get() on a dentry that isn't even a file, let * them know about it. */ if (WARN_ON(!d_is_reg(dentry))) return -EINVAL; d_fsd = READ_ONCE(dentry->d_fsdata); if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { fsd = d_fsd; } else { fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); if (!fsd) return -ENOMEM; if ((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT) { fsd->real_fops = NULL; fsd->short_fops = (void *)((unsigned long)d_fsd & ~(DEBUGFS_FSDATA_IS_REAL_FOPS_BIT | DEBUGFS_FSDATA_IS_SHORT_FOPS_BIT)); } else { fsd->real_fops = (void *)((unsigned long)d_fsd & ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); fsd->short_fops = NULL; } refcount_set(&fsd->active_users, 1); init_completion(&fsd->active_users_drained); INIT_LIST_HEAD(&fsd->cancellations); mutex_init(&fsd->cancellations_mtx); if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) { mutex_destroy(&fsd->cancellations_mtx); kfree(fsd); fsd = READ_ONCE(dentry->d_fsdata); } } /* * In case of a successful cmpxchg() above, this check is * strictly necessary and must follow it, see the comment in * __debugfs_remove_file(). * OTOH, if the cmpxchg() hasn't been executed or wasn't * successful, this serves the purpose of not starving * removers. */ if (d_unlinked(dentry)) return -EIO; if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; return 0; } EXPORT_SYMBOL_GPL(debugfs_file_get); /** * debugfs_file_put - mark the end of file data access * @dentry: the dentry object formerly passed to * debugfs_file_get(). * * Allow any ongoing concurrent call into debugfs_remove() or * debugfs_remove_recursive() blocked by a former call to * debugfs_file_get() to proceed and return to its caller. */ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_file_put); /** * debugfs_enter_cancellation - enter a debugfs cancellation * @file: the file being accessed * @cancellation: the cancellation object, the cancel callback * inside of it must be initialized * * When a debugfs file is removed it needs to wait for all active * operations to complete. However, the operation itself may need * to wait for hardware or completion of some asynchronous process * or similar. As such, it may need to be cancelled to avoid long * waits or even deadlocks. * * This function can be used inside a debugfs handler that may * need to be cancelled. As soon as this function is called, the * cancellation's 'cancel' callback may be called, at which point * the caller should proceed to call debugfs_leave_cancellation() * and leave the debugfs handler function as soon as possible. * Note that the 'cancel' callback is only ever called in the * context of some kind of debugfs_remove(). * * This function must be paired with debugfs_leave_cancellation(). */ void debugfs_enter_cancellation(struct file *file, struct debugfs_cancellation *cancellation) { struct debugfs_fsdata *fsd; struct dentry *dentry = F_DENTRY(file); INIT_LIST_HEAD(&cancellation->list); if (WARN_ON(!d_is_reg(dentry))) return; if (WARN_ON(!cancellation->cancel)) return; fsd = READ_ONCE(dentry->d_fsdata); if (WARN_ON(!fsd || ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) return; mutex_lock(&fsd->cancellations_mtx); list_add(&cancellation->list, &fsd->cancellations); mutex_unlock(&fsd->cancellations_mtx); /* if we're already removing wake it up to cancel */ if (d_unlinked(dentry)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_enter_cancellation); /** * debugfs_leave_cancellation - leave cancellation section * @file: the file being accessed * @cancellation: the cancellation previously registered with * debugfs_enter_cancellation() * * See the documentation of debugfs_enter_cancellation(). */ void debugfs_leave_cancellation(struct file *file, struct debugfs_cancellation *cancellation) { struct debugfs_fsdata *fsd; struct dentry *dentry = F_DENTRY(file); if (WARN_ON(!d_is_reg(dentry))) return; fsd = READ_ONCE(dentry->d_fsdata); if (WARN_ON(!fsd || ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) return; mutex_lock(&fsd->cancellations_mtx); if (!list_empty(&cancellation->list)) list_del(&cancellation->list); mutex_unlock(&fsd->cancellations_mtx); } EXPORT_SYMBOL_GPL(debugfs_leave_cancellation); /* * Only permit access to world-readable files when the kernel is locked down. * We also need to exclude any file that has ways to write or alter it as root * can bypass the permissions check. */ static int debugfs_locked_down(struct inode *inode, struct file *filp, const struct file_operations *real_fops) { if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && (!real_fops || (!real_fops->unlocked_ioctl && !real_fops->compat_ioctl && !real_fops->mmap))) return 0; if (security_locked_down(LOCKDOWN_DEBUGFS)) return -EPERM; return 0; } static int open_proxy_open(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = NULL; int r; r = debugfs_file_get(dentry); if (r) return r == -EIO ? -ENOENT : r; real_fops = debugfs_real_fops(filp); r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { r = -ENXIO; goto out; } #endif /* Huh? Module did not clean up after itself at exit? */ WARN(1, "debugfs file owner did not clean up at exit: %pd", dentry); r = -ENXIO; goto out; } replace_fops(filp, real_fops); if (real_fops->open) r = real_fops->open(inode, filp); out: debugfs_file_put(dentry); return r; } const struct file_operations debugfs_open_proxy_file_operations = { .open = open_proxy_open, }; #define PROTO(args...) args #define ARGS(args...) args #define FULL_PROXY_FUNC(name, ret_type, filp, proto, args) \ static ret_type full_proxy_ ## name(proto) \ { \ struct dentry *dentry = F_DENTRY(filp); \ const struct file_operations *real_fops; \ ret_type r; \ \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ real_fops = debugfs_real_fops(filp); \ r = real_fops->name(args); \ debugfs_file_put(dentry); \ return r; \ } #define FULL_PROXY_FUNC_BOTH(name, ret_type, filp, proto, args) \ static ret_type full_proxy_ ## name(proto) \ { \ struct dentry *dentry = F_DENTRY(filp); \ struct debugfs_fsdata *fsd; \ ret_type r; \ \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ fsd = dentry->d_fsdata; \ if (fsd->real_fops) \ r = fsd->real_fops->name(args); \ else \ r = fsd->short_fops->name(args); \ debugfs_file_put(dentry); \ return r; \ } FULL_PROXY_FUNC_BOTH(llseek, loff_t, filp, PROTO(struct file *filp, loff_t offset, int whence), ARGS(filp, offset, whence)); FULL_PROXY_FUNC_BOTH(read, ssize_t, filp, PROTO(struct file *filp, char __user *buf, size_t size, loff_t *ppos), ARGS(filp, buf, size, ppos)); FULL_PROXY_FUNC_BOTH(write, ssize_t, filp, PROTO(struct file *filp, const char __user *buf, size_t size, loff_t *ppos), ARGS(filp, buf, size, ppos)); FULL_PROXY_FUNC(unlocked_ioctl, long, filp, PROTO(struct file *filp, unsigned int cmd, unsigned long arg), ARGS(filp, cmd, arg)); static __poll_t full_proxy_poll(struct file *filp, struct poll_table_struct *wait) { struct dentry *dentry = F_DENTRY(filp); __poll_t r = 0; const struct file_operations *real_fops; if (debugfs_file_get(dentry)) return EPOLLHUP; real_fops = debugfs_real_fops(filp); r = real_fops->poll(filp, wait); debugfs_file_put(dentry); return r; } static int full_proxy_release(struct inode *inode, struct file *filp) { const struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = debugfs_real_fops(filp); const struct file_operations *proxy_fops = filp->f_op; int r = 0; /* * We must not protect this against removal races here: the * original releaser should be called unconditionally in order * not to leak any resources. Releasers must not assume that * ->i_private is still being meaningful here. */ if (real_fops && real_fops->release) r = real_fops->release(inode, filp); replace_fops(filp, d_inode(dentry)->i_fop); kfree(proxy_fops); fops_put(real_fops); return r; } static void __full_proxy_fops_init(struct file_operations *proxy_fops, struct debugfs_fsdata *fsd) { proxy_fops->release = full_proxy_release; if ((fsd->real_fops && fsd->real_fops->llseek) || (fsd->short_fops && fsd->short_fops->llseek)) proxy_fops->llseek = full_proxy_llseek; if ((fsd->real_fops && fsd->real_fops->read) || (fsd->short_fops && fsd->short_fops->read)) proxy_fops->read = full_proxy_read; if ((fsd->real_fops && fsd->real_fops->write) || (fsd->short_fops && fsd->short_fops->write)) proxy_fops->write = full_proxy_write; if (fsd->real_fops && fsd->real_fops->poll) proxy_fops->poll = full_proxy_poll; if (fsd->real_fops && fsd->real_fops->unlocked_ioctl) proxy_fops->unlocked_ioctl = full_proxy_unlocked_ioctl; } static int full_proxy_open(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops; struct file_operations *proxy_fops = NULL; struct debugfs_fsdata *fsd; int r; r = debugfs_file_get(dentry); if (r) return r == -EIO ? -ENOENT : r; fsd = dentry->d_fsdata; real_fops = fsd->real_fops; r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; if (real_fops && !fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { r = -ENXIO; goto out; } #endif /* Huh? Module did not cleanup after itself at exit? */ WARN(1, "debugfs file owner did not clean up at exit: %pd", dentry); r = -ENXIO; goto out; } proxy_fops = kzalloc(sizeof(*proxy_fops), GFP_KERNEL); if (!proxy_fops) { r = -ENOMEM; goto free_proxy; } __full_proxy_fops_init(proxy_fops, fsd); replace_fops(filp, proxy_fops); if (!real_fops || real_fops->open) { if (real_fops) r = real_fops->open(inode, filp); else r = simple_open(inode, filp); if (r) { replace_fops(filp, d_inode(dentry)->i_fop); goto free_proxy; } else if (filp->f_op != proxy_fops) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); goto free_proxy; } } goto out; free_proxy: kfree(proxy_fops); fops_put(real_fops); out: debugfs_file_put(dentry); return r; } const struct file_operations debugfs_full_proxy_file_operations = { .open = full_proxy_open, }; ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; ret = simple_attr_read(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } EXPORT_SYMBOL_GPL(debugfs_attr_read); static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, size_t len, loff_t *ppos, bool is_signed) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; if (is_signed) ret = simple_attr_write_signed(file, buf, len, ppos); else ret = simple_attr_write(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return debugfs_attr_write_xsigned(file, buf, len, ppos, false); } EXPORT_SYMBOL_GPL(debugfs_attr_write); ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return debugfs_attr_write_xsigned(file, buf, len, ppos, true); } EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, const struct file_operations *fops_ro, const struct file_operations *fops_wo) { /* if there are no write bits set, make read only */ if (!(mode & S_IWUGO)) return debugfs_create_file_unsafe(name, mode, parent, value, fops_ro); /* if there are no read bits set, make write only */ if (!(mode & S_IRUGO)) return debugfs_create_file_unsafe(name, mode, parent, value, fops_wo); return debugfs_create_file_unsafe(name, mode, parent, value, fops); } static int debugfs_u8_set(void *data, u64 val) { *(u8 *)data = val; return 0; } static int debugfs_u8_get(void *data, u64 *val) { *val = *(u8 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u8, &fops_u8_ro, &fops_u8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u8); static int debugfs_u16_set(void *data, u64 val) { *(u16 *)data = val; return 0; } static int debugfs_u16_get(void *data, u64 *val) { *val = *(u16 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); /** * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u16, &fops_u16_ro, &fops_u16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u16); static int debugfs_u32_set(void *data, u64 val) { *(u32 *)data = val; return 0; } static int debugfs_u32_get(void *data, u64 *val) { *val = *(u32 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); /** * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, &fops_u32_ro, &fops_u32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u32); static int debugfs_u64_set(void *data, u64 val) { *(u64 *)data = val; return 0; } static int debugfs_u64_get(void *data, u64 *val) { *val = *(u64 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); /** * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u64, &fops_u64_ro, &fops_u64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u64); static int debugfs_ulong_set(void *data, u64 val) { *(unsigned long *)data = val; return 0; } static int debugfs_ulong_get(void *data, u64 *val) { *val = *(unsigned long *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); /** * debugfs_create_ulong - create a debugfs file that is used to read and write * an unsigned long value. * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_ulong, &fops_ulong_ro, &fops_ulong_wo); } EXPORT_SYMBOL_GPL(debugfs_create_ulong); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n"); /* * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value * * These functions are exactly the same as the above functions (but use a hex * output for the decimal challenged). For details look at the above unsigned * decimal functions. */ /** * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x8, &fops_x8_ro, &fops_x8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x8); /** * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x16, &fops_x16_ro, &fops_x16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x16); /** * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x32, &fops_x32_ro, &fops_x32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x32); /** * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x64, &fops_x64_ro, &fops_x64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x64); static int debugfs_size_t_set(void *data, u64 val) { *(size_t *)data = val; return 0; } static int debugfs_size_t_get(void *data, u64 *val) { *val = *(size_t *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set, "%llu\n"); /* %llu and %zu are more or less the same */ DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n"); /** * debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_size_t, &fops_size_t_ro, &fops_size_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_size_t); static int debugfs_atomic_t_set(void *data, u64 val) { atomic_set((atomic_t *)data, val); return 0; } static int debugfs_atomic_t_get(void *data, u64 *val) { *val = atomic_read((atomic_t *)data); return 0; } DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get, debugfs_atomic_t_set, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); /** * debugfs_create_atomic_t - create a debugfs file that is used to read and * write an atomic_t value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_atomic_t, &fops_atomic_t_ro, &fops_atomic_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[2]; bool val; int r; struct dentry *dentry = F_DENTRY(file); r = debugfs_file_get(dentry); if (unlikely(r)) return r; val = *(bool *)file->private_data; debugfs_file_put(dentry); if (val) buf[0] = 'Y'; else buf[0] = 'N'; buf[1] = '\n'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } EXPORT_SYMBOL_GPL(debugfs_read_file_bool); ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { bool bv; int r; bool *val = file->private_data; struct dentry *dentry = F_DENTRY(file); r = kstrtobool_from_user(user_buf, count, &bv); if (!r) { r = debugfs_file_get(dentry); if (unlikely(r)) return r; *val = bv; debugfs_file_put(dentry); } return count; } EXPORT_SYMBOL_GPL(debugfs_write_file_bool); static const struct file_operations fops_bool = { .read = debugfs_read_file_bool, .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_bool_ro = { .read = debugfs_read_file_bool, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_bool_wo = { .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_bool - create a debugfs file that is used to read and write a boolean value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_bool, &fops_bool_ro, &fops_bool_wo); } EXPORT_SYMBOL_GPL(debugfs_create_bool); ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); char *str, *copy = NULL; int copy_len, len; ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; str = *(char **)file->private_data; len = strlen(str) + 1; copy = kmalloc(len, GFP_KERNEL); if (!copy) { debugfs_file_put(dentry); return -ENOMEM; } copy_len = strscpy(copy, str, len); debugfs_file_put(dentry); if (copy_len < 0) { kfree(copy); return copy_len; } copy[copy_len] = '\n'; ret = simple_read_from_buffer(user_buf, count, ppos, copy, len); kfree(copy); return ret; } EXPORT_SYMBOL_GPL(debugfs_create_str); static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); char *old, *new = NULL; int pos = *ppos; int r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; old = *(char **)file->private_data; /* only allow strict concatenation */ r = -EINVAL; if (pos && pos != strlen(old)) goto error; r = -E2BIG; if (pos + count + 1 > PAGE_SIZE) goto error; r = -ENOMEM; new = kmalloc(pos + count + 1, GFP_KERNEL); if (!new) goto error; if (pos) memcpy(new, old, pos); r = -EFAULT; if (copy_from_user(new + pos, user_buf, count)) goto error; new[pos + count] = '\0'; strim(new); rcu_assign_pointer(*(char __rcu **)file->private_data, new); synchronize_rcu(); kfree(old); debugfs_file_put(dentry); return count; error: kfree(new); debugfs_file_put(dentry); return r; } static const struct file_operations fops_str = { .read = debugfs_read_file_str, .write = debugfs_write_file_str, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_str_ro = { .read = debugfs_read_file_str, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_str_wo = { .write = debugfs_write_file_str, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_str - create a debugfs file that is used to read and write a string value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_str(const char *name, umode_t mode, struct dentry *parent, char **value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str, &fops_str_ro, &fops_str_wo); } static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; struct dentry *dentry = F_DENTRY(file); ssize_t r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; r = simple_read_from_buffer(user_buf, count, ppos, blob->data, blob->size); debugfs_file_put(dentry); return r; } static ssize_t write_file_blob(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; struct dentry *dentry = F_DENTRY(file); ssize_t r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; r = simple_write_to_buffer(blob->data, blob->size, ppos, user_buf, count); debugfs_file_put(dentry); return r; } static const struct file_operations fops_blob = { .read = read_file_blob, .write = write_file_blob, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_blob - create a debugfs file that is used to read and write * a binary blob * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer * to the blob data and the size of the data. * * This function creates a file in debugfs with the given name that exports * @blob->data as a binary blob. If the @mode variable is so set it can be * read from and written to. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be * returned. * * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will * be returned. */ struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) { return debugfs_create_file_unsafe(name, mode & 0644, parent, blob, &fops_blob); } EXPORT_SYMBOL_GPL(debugfs_create_blob); static size_t u32_format_array(char *buf, size_t bufsize, u32 *array, int array_size) { size_t ret = 0; while (--array_size >= 0) { size_t len; char term = array_size ? ' ' : '\n'; len = snprintf(buf, bufsize, "%u%c", *array++, term); ret += len; buf += len; bufsize -= len; } return ret; } static int u32_array_open(struct inode *inode, struct file *file) { struct debugfs_u32_array *data = inode->i_private; int size, elements = data->n_elements; char *buf; /* * Max size: * - 10 digits + ' '/'\n' = 11 bytes per number * - terminating NUL character */ size = elements*11; buf = kmalloc(size+1, GFP_KERNEL); if (!buf) return -ENOMEM; buf[size] = 0; file->private_data = buf; u32_format_array(buf, size, data->array, data->n_elements); return nonseekable_open(inode, file); } static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { size_t size = strlen(file->private_data); return simple_read_from_buffer(buf, len, ppos, file->private_data, size); } static int u32_array_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static const struct file_operations u32_array_fops = { .owner = THIS_MODULE, .open = u32_array_open, .release = u32_array_release, .read = u32_array_read, }; /** * debugfs_create_u32_array - create a debugfs file that is used to read u32 * array. * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @array: wrapper struct containing data pointer and size of the array. * * This function creates a file in debugfs with the given name that exports * @array as data. If the @mode variable is so set it can be read from. * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. */ void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, struct debugfs_u32_array *array) { debugfs_create_file_unsafe(name, mode, parent, array, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); #ifdef CONFIG_HAS_IOMEM /* * The regset32 stuff is used to print 32-bit registers using the * seq_file utilities. We offer printing a register set in an already-opened * sequential file or create a debugfs file that only prints a regset32. */ /** * debugfs_print_regs32 - use seq_print to describe a set of registers * @s: the seq_file structure being used to generate output * @regs: an array if struct debugfs_reg32 structures * @nregs: the length of the above array * @base: the base address to be used in reading the registers * @prefix: a string to be prefixed to every output line * * This function outputs a text block describing the current values of * some 32-bit hardware registers. It is meant to be used within debugfs * files based on seq_file that need to show registers, intermixed with other * information. The prefix argument may be used to specify a leading string, * because some peripherals have several blocks of identical registers, * for example configuration of dma channels */ void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix) { int i; for (i = 0; i < nregs; i++, regs++) { if (prefix) seq_printf(s, "%s", prefix); seq_printf(s, "%s = 0x%08x\n", regs->name, readl(base + regs->offset)); if (seq_has_overflowed(s)) break; } } EXPORT_SYMBOL_GPL(debugfs_print_regs32); static int debugfs_regset32_show(struct seq_file *s, void *data) { struct debugfs_regset32 *regset = s->private; if (regset->dev) pm_runtime_get_sync(regset->dev); debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, ""); if (regset->dev) pm_runtime_put(regset->dev); return 0; } DEFINE_SHOW_ATTRIBUTE(debugfs_regset32); /** * debugfs_create_regset32 - create a debugfs file that returns register values * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @regset: a pointer to a struct debugfs_regset32, which contains a pointer * to an array of register definitions, the array size and the base * address where the register bank is to be found. * * This function creates a file in debugfs with the given name that reports * the names and values of a set of 32-bit registers. If the @mode variable * is so set it can be read from. Writing is not supported. */ void debugfs_create_regset32(const char *name, umode_t mode, struct dentry *parent, struct debugfs_regset32 *regset) { debugfs_create_file(name, mode, parent, regset, &debugfs_regset32_fops); } EXPORT_SYMBOL_GPL(debugfs_create_regset32); #endif /* CONFIG_HAS_IOMEM */ struct debugfs_devm_entry { int (*read)(struct seq_file *seq, void *data); struct device *dev; }; static int debugfs_devm_entry_open(struct inode *inode, struct file *f) { struct debugfs_devm_entry *entry = inode->i_private; return single_open(f, entry->read, entry->dev); } static const struct file_operations debugfs_devm_entry_ops = { .owner = THIS_MODULE, .open = debugfs_devm_entry_open, .release = single_release, .read = seq_read, .llseek = seq_lseek }; /** * debugfs_create_devm_seqfile - create a debugfs file that is bound to device. * * @dev: device related to this debugfs file. * @name: name of the debugfs file. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @read_fn: function pointer called to print the seq_file content. */ void debugfs_create_devm_seqfile(struct device *dev, const char *name, struct dentry *parent, int (*read_fn)(struct seq_file *s, void *data)) { struct debugfs_devm_entry *entry; if (IS_ERR(parent)) return; entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL); if (!entry) return; entry->read = read_fn; entry->dev = dev; debugfs_create_file(name, S_IRUGO, parent, entry, &debugfs_devm_entry_ops); } EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 /* Block- or MTD-based romfs * * Copyright © 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * Derived from: ROMFS file system, Linux implementation * * Copyright © 1997-1999 Janos Farkas <chexum@shadow.banki.hu> * * Using parts of the minix filesystem * Copyright © 1991, 1992 Linus Torvalds * * and parts of the affs filesystem additionally * Copyright © 1993 Ray Burr * Copyright © 1996 Hans-Joachim Widmaier * * Changes * Changed for 2.1.19 modules * Jan 1997 Initial release * Jun 1997 2.1.43+ changes * Proper page locking in read_folio * Changed to work with 2.1.45+ fs * Jul 1997 Fixed follow_link * 2.1.47 * lookup shouldn't return -ENOENT * from Horst von Brand: * fail on wrong checksum * double unlock_super was possible * correct namelen for statfs * spotted by Bill Hawes: * readlink shouldn't iput() * Jun 1998 2.1.106 from Avery Pennarun: glibc scandir() * exposed a problem in readdir * 2.1.107 code-freeze spellchecker run * Aug 1998 2.1.118+ VFS changes * Sep 1998 2.1.122 another VFS change (follow_link) * Apr 1999 2.2.7 no more EBADF checking in * lookup/readdir, use ERR_PTR * Jun 1999 2.3.6 d_alloc_root use changed * 2.3.9 clean up usage of ENOENT/negative * dentries in lookup * clean up page flags setting * (error, uptodate, locking) in * in read_folio * use init_special_inode for * fifos/sockets (and streamline) in * read_inode, fix _ops table order * Aug 1999 2.3.16 __initfunc() => __init change * Oct 1999 2.3.24 page->owner hack obsoleted * Nov 1999 2.3.27 2.3.25+ page->offset => index change * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> #include <linux/time.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/fs_context.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/statfs.h> #include <linux/mtd/super.h> #include <linux/ctype.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/uaccess.h> #include <linux/major.h> #include "internal.h" static struct kmem_cache *romfs_inode_cachep; static const umode_t romfs_modemap[8] = { 0, /* hard link */ S_IFDIR | 0644, /* directory */ S_IFREG | 0644, /* regular file */ S_IFLNK | 0777, /* symlink */ S_IFBLK | 0600, /* blockdev */ S_IFCHR | 0600, /* chardev */ S_IFSOCK | 0644, /* socket */ S_IFIFO | 0644 /* FIFO */ }; static const unsigned char romfs_dtype_table[] = { DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_SOCK, DT_FIFO }; static struct inode *romfs_iget(struct super_block *sb, unsigned long pos); /* * read a page worth of data from the image */ static int romfs_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; loff_t offset, size; unsigned long fillsize, pos; void *buf; int ret; buf = kmap_local_folio(folio, 0); offset = folio_pos(folio); size = i_size_read(inode); fillsize = 0; ret = 0; if (offset < size) { size -= offset; fillsize = size > PAGE_SIZE ? PAGE_SIZE : size; pos = ROMFS_I(inode)->i_dataoffset + offset; ret = romfs_dev_read(inode->i_sb, pos, buf, fillsize); if (ret < 0) { fillsize = 0; ret = -EIO; } } buf = folio_zero_tail(folio, fillsize, buf + fillsize); kunmap_local(buf); folio_end_read(folio, ret == 0); return ret; } static const struct address_space_operations romfs_aops = { .read_folio = romfs_read_folio }; /* * read the entries from a directory */ static int romfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *i = file_inode(file); struct romfs_inode ri; unsigned long offset, maxoff; int j, ino, nextfh; char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ int ret; maxoff = romfs_maxsize(i->i_sb); offset = ctx->pos; if (!offset) { offset = i->i_ino & ROMFH_MASK; ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); if (ret < 0) goto out; offset = be32_to_cpu(ri.spec) & ROMFH_MASK; } /* Not really failsafe, but we are read-only... */ for (;;) { if (!offset || offset >= maxoff) { offset = maxoff; ctx->pos = offset; goto out; } ctx->pos = offset; /* Fetch inode info */ ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); if (ret < 0) goto out; j = romfs_dev_strnlen(i->i_sb, offset + ROMFH_SIZE, sizeof(fsname) - 1); if (j < 0) goto out; ret = romfs_dev_read(i->i_sb, offset + ROMFH_SIZE, fsname, j); if (ret < 0) goto out; fsname[j] = '\0'; ino = offset; nextfh = be32_to_cpu(ri.next); if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) ino = be32_to_cpu(ri.spec); if (!dir_emit(ctx, fsname, j, ino, romfs_dtype_table[nextfh & ROMFH_TYPE])) goto out; offset = nextfh & ROMFH_MASK; } out: return 0; } /* * look up an entry in a directory */ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned long offset, maxoff; struct inode *inode = NULL; struct romfs_inode ri; const char *name; /* got from dentry */ int len, ret; offset = dir->i_ino & ROMFH_MASK; ret = romfs_dev_read(dir->i_sb, offset, &ri, ROMFH_SIZE); if (ret < 0) goto error; /* search all the file entries in the list starting from the one * pointed to by the directory's special data */ maxoff = romfs_maxsize(dir->i_sb); offset = be32_to_cpu(ri.spec) & ROMFH_MASK; name = dentry->d_name.name; len = dentry->d_name.len; for (;;) { if (!offset || offset >= maxoff) break; ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri)); if (ret < 0) goto error; /* try to match the first 16 bytes of name */ ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name, len); if (ret < 0) goto error; if (ret == 1) { /* Hard link handling */ if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) offset = be32_to_cpu(ri.spec) & ROMFH_MASK; inode = romfs_iget(dir->i_sb, offset); break; } /* next entry */ offset = be32_to_cpu(ri.next) & ROMFH_MASK; } return d_splice_alias(inode, dentry); error: return ERR_PTR(ret); } static const struct file_operations romfs_dir_operations = { .read = generic_read_dir, .iterate_shared = romfs_readdir, .llseek = generic_file_llseek, }; static const struct inode_operations romfs_dir_inode_operations = { .lookup = romfs_lookup, }; /* * get a romfs inode based on its position in the image (which doubles as the * inode number) */ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) { struct romfs_inode_info *inode; struct romfs_inode ri; struct inode *i; unsigned long nlen; unsigned nextfh; int ret; umode_t mode; /* we might have to traverse a chain of "hard link" file entries to get * to the actual file */ for (;;) { ret = romfs_dev_read(sb, pos, &ri, sizeof(ri)); if (ret < 0) goto error; /* XXX: do romfs_checksum here too (with name) */ nextfh = be32_to_cpu(ri.next); if ((nextfh & ROMFH_TYPE) != ROMFH_HRD) break; pos = be32_to_cpu(ri.spec) & ROMFH_MASK; } /* determine the length of the filename */ nlen = romfs_dev_strnlen(sb, pos + ROMFH_SIZE, ROMFS_MAXFN); if (IS_ERR_VALUE(nlen)) goto eio; /* get an inode for this image position */ i = iget_locked(sb, pos); if (!i) return ERR_PTR(-ENOMEM); if (!(i->i_state & I_NEW)) return i; /* precalculate the data offset */ inode = ROMFS_I(i); inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK; inode->i_dataoffset = pos + inode->i_metasize; set_nlink(i, 1); /* Hard to decide.. */ i->i_size = be32_to_cpu(ri.size); inode_set_mtime_to_ts(i, inode_set_atime_to_ts(i, inode_set_ctime(i, 0, 0))); /* set up mode and ops */ mode = romfs_modemap[nextfh & ROMFH_TYPE]; switch (nextfh & ROMFH_TYPE) { case ROMFH_DIR: i->i_size = ROMFS_I(i)->i_metasize; i->i_op = &romfs_dir_inode_operations; i->i_fop = &romfs_dir_operations; if (nextfh & ROMFH_EXEC) mode |= S_IXUGO; break; case ROMFH_REG: i->i_fop = &romfs_ro_fops; i->i_data.a_ops = &romfs_aops; if (nextfh & ROMFH_EXEC) mode |= S_IXUGO; break; case ROMFH_SYM: i->i_op = &page_symlink_inode_operations; inode_nohighmem(i); i->i_data.a_ops = &romfs_aops; mode |= S_IRWXUGO; break; default: /* depending on MBZ for sock/fifos */ nextfh = be32_to_cpu(ri.spec); init_special_inode(i, mode, MKDEV(nextfh >> 16, nextfh & 0xffff)); break; } i->i_mode = mode; i->i_blocks = (i->i_size + 511) >> 9; unlock_new_inode(i); return i; eio: ret = -EIO; error: pr_err("read error for inode 0x%lx\n", pos); return ERR_PTR(ret); } /* * allocate a new inode */ static struct inode *romfs_alloc_inode(struct super_block *sb) { struct romfs_inode_info *inode; inode = alloc_inode_sb(sb, romfs_inode_cachep, GFP_KERNEL); return inode ? &inode->vfs_inode : NULL; } /* * return a spent inode to the slab cache */ static void romfs_free_inode(struct inode *inode) { kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } /* * get filesystem statistics */ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = 0; /* When calling huge_encode_dev(), * use sb->s_bdev->bd_dev when, * - CONFIG_ROMFS_ON_BLOCK defined * use sb->s_dev when, * - CONFIG_ROMFS_ON_BLOCK undefined and * - CONFIG_ROMFS_ON_MTD defined * leave id as 0 when, * - CONFIG_ROMFS_ON_BLOCK undefined and * - CONFIG_ROMFS_ON_MTD undefined */ if (sb->s_bdev) id = huge_encode_dev(sb->s_bdev->bd_dev); else if (sb->s_dev) id = huge_encode_dev(sb->s_dev); buf->f_type = ROMFS_MAGIC; buf->f_namelen = ROMFS_MAXFN; buf->f_bsize = ROMBSIZE; buf->f_bfree = buf->f_bavail = buf->f_ffree; buf->f_blocks = (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; buf->f_fsid = u64_to_fsid(id); return 0; } /* * remounting must involve read-only */ static int romfs_reconfigure(struct fs_context *fc) { sync_filesystem(fc->root->d_sb); fc->sb_flags |= SB_RDONLY; return 0; } static const struct super_operations romfs_super_ops = { .alloc_inode = romfs_alloc_inode, .free_inode = romfs_free_inode, .statfs = romfs_statfs, }; /* * checksum check on part of a romfs filesystem */ static __u32 romfs_checksum(const void *data, int size) { const __be32 *ptr = data; __u32 sum; sum = 0; size >>= 2; while (size > 0) { sum += be32_to_cpu(*ptr++); size--; } return sum; } /* * fill in the superblock */ static int romfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct romfs_super_block *rsb; struct inode *root; unsigned long pos, img_size; const char *storage; size_t len; int ret; #ifdef CONFIG_BLOCK if (!sb->s_mtd) { sb_set_blocksize(sb, ROMBSIZE); } else { sb->s_blocksize = ROMBSIZE; sb->s_blocksize_bits = blksize_bits(ROMBSIZE); } #endif sb->s_maxbytes = 0xFFFFFFFF; sb->s_magic = ROMFS_MAGIC; sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_time_min = 0; sb->s_time_max = 0; sb->s_op = &romfs_super_ops; #ifdef CONFIG_ROMFS_ON_MTD /* Use same dev ID from the underlying mtdblock device */ if (sb->s_mtd) sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index); #endif /* read the image superblock and check it */ rsb = kmalloc(512, GFP_KERNEL); if (!rsb) return -ENOMEM; sb->s_fs_info = (void *) 512; ret = romfs_dev_read(sb, 0, rsb, 512); if (ret < 0) goto error_rsb; img_size = be32_to_cpu(rsb->size); if (sb->s_mtd && img_size > sb->s_mtd->size) goto error_rsb_inval; sb->s_fs_info = (void *) img_size; if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 || img_size < ROMFH_SIZE) { if (!(fc->sb_flags & SB_SILENT)) errorf(fc, "VFS: Can't find a romfs filesystem on dev %s.\n", sb->s_id); goto error_rsb_inval; } if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) { pr_err("bad initial checksum on dev %s.\n", sb->s_id); goto error_rsb_inval; } storage = sb->s_mtd ? "MTD" : "the block layer"; len = strnlen(rsb->name, ROMFS_MAXFN); if (!(fc->sb_flags & SB_SILENT)) pr_notice("Mounting image '%*.*s' through %s\n", (unsigned) len, (unsigned) len, rsb->name, storage); kfree(rsb); rsb = NULL; /* find the root directory */ pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; root = romfs_iget(sb, pos); if (IS_ERR(root)) return PTR_ERR(root); sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; return 0; error_rsb_inval: ret = -EINVAL; error_rsb: kfree(rsb); return ret; } /* * get a superblock for mounting */ static int romfs_get_tree(struct fs_context *fc) { int ret = -EINVAL; #ifdef CONFIG_ROMFS_ON_MTD ret = get_tree_mtd(fc, romfs_fill_super); #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (ret == -EINVAL) ret = get_tree_bdev(fc, romfs_fill_super); #endif return ret; } static const struct fs_context_operations romfs_context_ops = { .get_tree = romfs_get_tree, .reconfigure = romfs_reconfigure, }; /* * Set up the filesystem mount context. */ static int romfs_init_fs_context(struct fs_context *fc) { fc->ops = &romfs_context_ops; return 0; } /* * destroy a romfs superblock in the appropriate manner */ static void romfs_kill_sb(struct super_block *sb) { generic_shutdown_super(sb); #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) { put_mtd_device(sb->s_mtd); sb->s_mtd = NULL; } #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) { sync_blockdev(sb->s_bdev); bdev_fput(sb->s_bdev_file); } #endif } static struct file_system_type romfs_fs_type = { .owner = THIS_MODULE, .name = "romfs", .init_fs_context = romfs_init_fs_context, .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("romfs"); /* * inode storage initialiser */ static void romfs_i_init_once(void *_inode) { struct romfs_inode_info *inode = _inode; inode_init_once(&inode->vfs_inode); } /* * romfs module initialisation */ static int __init init_romfs_fs(void) { int ret; pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n"); romfs_inode_cachep = kmem_cache_create("romfs_i", sizeof(struct romfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, romfs_i_init_once); if (!romfs_inode_cachep) { pr_err("Failed to initialise inode cache\n"); return -ENOMEM; } ret = register_filesystem(&romfs_fs_type); if (ret) { pr_err("Failed to register filesystem\n"); goto error_register; } return 0; error_register: kmem_cache_destroy(romfs_inode_cachep); return ret; } /* * romfs module removal */ static void __exit exit_romfs_fs(void) { unregister_filesystem(&romfs_fs_type); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(romfs_inode_cachep); } module_init(init_romfs_fs); module_exit(exit_romfs_fs); MODULE_DESCRIPTION("Direct-MTD Capable RomFS"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); /* Actually dual-licensed, but it doesn't matter for */
44 1 40 18 1 23 7 7 7 2 1 4 4 6 6 19 19 17 2 7 7 1 21 21 22 23 23 2 19 44 44 41 23 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue> * * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. */ #include <linux/skbuff.h> #include <net/ipv6.h> #include <net/mld.h> #include <net/addrconf.h> #include <net/ip6_checksum.h> static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) { const struct ipv6hdr *ip6h; unsigned int len; unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); if (!pskb_may_pull(skb, offset)) return -EINVAL; ip6h = ipv6_hdr(skb); if (ip6h->version != 6) return -EINVAL; len = offset + ntohs(ip6h->payload_len); if (skb->len < len || len <= offset) return -EINVAL; skb_set_transport_header(skb, offset); return 0; } static int ipv6_mc_check_exthdrs(struct sk_buff *skb) { const struct ipv6hdr *ip6h; int offset; u8 nexthdr; __be16 frag_off; ip6h = ipv6_hdr(skb); if (ip6h->nexthdr != IPPROTO_HOPOPTS) return -ENOMSG; nexthdr = ip6h->nexthdr; offset = skb_network_offset(skb) + sizeof(*ip6h); offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); if (offset < 0) return -EINVAL; if (nexthdr != IPPROTO_ICMPV6) return -ENOMSG; skb_set_transport_header(skb, offset); return 0; } static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) { unsigned int len = skb_transport_offset(skb); len += sizeof(struct mld2_report); return ipv6_mc_may_pull(skb, len) ? 0 : -EINVAL; } static int ipv6_mc_check_mld_query(struct sk_buff *skb) { unsigned int transport_len = ipv6_transport_len(skb); struct mld_msg *mld; unsigned int len; /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; /* MLDv1? */ if (transport_len != sizeof(struct mld_msg)) { /* or MLDv2? */ if (transport_len < sizeof(struct mld2_query)) return -EINVAL; len = skb_transport_offset(skb) + sizeof(struct mld2_query); if (!ipv6_mc_may_pull(skb, len)) return -EINVAL; } mld = (struct mld_msg *)skb_transport_header(skb); /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer * all-nodes destination address (ff02::1) for general queries */ if (ipv6_addr_any(&mld->mld_mca) && !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) return -EINVAL; return 0; } static int ipv6_mc_check_mld_msg(struct sk_buff *skb) { unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); struct mld_msg *mld; if (!ipv6_mc_may_pull(skb, len)) return -ENODATA; mld = (struct mld_msg *)skb_transport_header(skb); switch (mld->mld_type) { case ICMPV6_MGM_REDUCTION: case ICMPV6_MGM_REPORT: return 0; case ICMPV6_MLD2_REPORT: return ipv6_mc_check_mld_reportv2(skb); case ICMPV6_MGM_QUERY: return ipv6_mc_check_mld_query(skb); default: return -ENODATA; } } static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) { return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); } static int ipv6_mc_check_icmpv6(struct sk_buff *skb) { unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr); unsigned int transport_len = ipv6_transport_len(skb); struct sk_buff *skb_chk; if (!ipv6_mc_may_pull(skb, len)) return -EINVAL; skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) return -EINVAL; if (skb_chk != skb) kfree_skb(skb_chk); return 0; } /** * ipv6_mc_check_mld - checks whether this is a sane MLD packet * @skb: the skb to validate * * Checks whether an IPv6 packet is a valid MLD packet. If so sets * skb transport header accordingly and returns zero. * * -EINVAL: A broken packet was detected, i.e. it violates some internet * standard * -ENOMSG: IP header validation succeeded but it is not an ICMPv6 packet * with a hop-by-hop option. * -ENODATA: IP+ICMPv6 header with hop-by-hop option validation succeeded * but it is not an MLD packet. * -ENOMEM: A memory allocation failure happened. * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ int ipv6_mc_check_mld(struct sk_buff *skb) { int ret; ret = ipv6_mc_check_ip6hdr(skb); if (ret < 0) return ret; ret = ipv6_mc_check_exthdrs(skb); if (ret < 0) return ret; ret = ipv6_mc_check_icmpv6(skb); if (ret < 0) return ret; return ipv6_mc_check_mld_msg(skb); } EXPORT_SYMBOL(ipv6_mc_check_mld);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Freecom USB/IDE adaptor * * Freecom v0.1: * * First release * * Current development and maintenance by: * (C) 2000 David Brown <usb-storage@davidb.org> * * This driver was developed with information provided in FREECOM's USB * Programmers Reference Guide. For further information contact Freecom * (https://www.freecom.de/) */ #include <linux/module.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-freecom" MODULE_DESCRIPTION("Driver for Freecom USB/IDE adaptor"); MODULE_AUTHOR("David Brown <usb-storage@davidb.org>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); #ifdef CONFIG_USB_STORAGE_DEBUG static void pdump(struct us_data *us, void *ibuffer, int length); #endif /* Bits of HD_STATUS */ #define ERR_STAT 0x01 #define DRQ_STAT 0x08 /* All of the outgoing packets are 64 bytes long. */ struct freecom_cb_wrap { u8 Type; /* Command type. */ u8 Timeout; /* Timeout in seconds. */ u8 Atapi[12]; /* An ATAPI packet. */ u8 Filler[50]; /* Padding Data. */ }; struct freecom_xfer_wrap { u8 Type; /* Command type. */ u8 Timeout; /* Timeout in seconds. */ __le32 Count; /* Number of bytes to transfer. */ u8 Pad[58]; } __attribute__ ((packed)); struct freecom_ide_out { u8 Type; /* Type + IDE register. */ u8 Pad; __le16 Value; /* Value to write. */ u8 Pad2[60]; }; struct freecom_ide_in { u8 Type; /* Type | IDE register. */ u8 Pad[63]; }; struct freecom_status { u8 Status; u8 Reason; __le16 Count; u8 Pad[60]; }; /* * Freecom stuffs the interrupt status in the INDEX_STAT bit of the ide * register. */ #define FCM_INT_STATUS 0x02 /* INDEX_STAT */ #define FCM_STATUS_BUSY 0x80 /* * These are the packet types. The low bit indicates that this command * should wait for an interrupt. */ #define FCM_PACKET_ATAPI 0x21 #define FCM_PACKET_STATUS 0x20 /* * Receive data from the IDE interface. The ATAPI packet has already * waited, so the data should be immediately available. */ #define FCM_PACKET_INPUT 0x81 /* Send data to the IDE interface. */ #define FCM_PACKET_OUTPUT 0x01 /* * Write a value to an ide register. Or the ide register to write after * munging the address a bit. */ #define FCM_PACKET_IDE_WRITE 0x40 #define FCM_PACKET_IDE_READ 0xC0 /* All packets (except for status) are 64 bytes long. */ #define FCM_PACKET_LENGTH 64 #define FCM_STATUS_PACKET_LENGTH 4 static int init_freecom(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id freecom_usb_ids[] = { # include "unusual_freecom.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, freecom_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev freecom_unusual_dev_list[] = { # include "unusual_freecom.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV static int freecom_readdata (struct scsi_cmnd *srb, struct us_data *us, unsigned int ipipe, unsigned int opipe, int count) { struct freecom_xfer_wrap *fxfr = (struct freecom_xfer_wrap *) us->iobuf; int result; fxfr->Type = FCM_PACKET_INPUT | 0x00; fxfr->Timeout = 0; /* Short timeout for debugging. */ fxfr->Count = cpu_to_le32 (count); memset (fxfr->Pad, 0, sizeof (fxfr->Pad)); usb_stor_dbg(us, "Read data Freecom! (c=%d)\n", count); /* Issue the transfer command. */ result = usb_stor_bulk_transfer_buf (us, opipe, fxfr, FCM_PACKET_LENGTH, NULL); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Freecom readdata transport error\n"); return USB_STOR_TRANSPORT_ERROR; } /* Now transfer all of our blocks. */ usb_stor_dbg(us, "Start of read\n"); result = usb_stor_bulk_srb(us, ipipe, srb); usb_stor_dbg(us, "freecom_readdata done!\n"); if (result > USB_STOR_XFER_SHORT) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } static int freecom_writedata (struct scsi_cmnd *srb, struct us_data *us, int unsigned ipipe, unsigned int opipe, int count) { struct freecom_xfer_wrap *fxfr = (struct freecom_xfer_wrap *) us->iobuf; int result; fxfr->Type = FCM_PACKET_OUTPUT | 0x00; fxfr->Timeout = 0; /* Short timeout for debugging. */ fxfr->Count = cpu_to_le32 (count); memset (fxfr->Pad, 0, sizeof (fxfr->Pad)); usb_stor_dbg(us, "Write data Freecom! (c=%d)\n", count); /* Issue the transfer command. */ result = usb_stor_bulk_transfer_buf (us, opipe, fxfr, FCM_PACKET_LENGTH, NULL); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Freecom writedata transport error\n"); return USB_STOR_TRANSPORT_ERROR; } /* Now transfer all of our blocks. */ usb_stor_dbg(us, "Start of write\n"); result = usb_stor_bulk_srb(us, opipe, srb); usb_stor_dbg(us, "freecom_writedata done!\n"); if (result > USB_STOR_XFER_SHORT) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } /* * Transport for the Freecom USB/IDE adaptor. * */ static int freecom_transport(struct scsi_cmnd *srb, struct us_data *us) { struct freecom_cb_wrap *fcb; struct freecom_status *fst; unsigned int ipipe, opipe; /* We need both pipes. */ int result; unsigned int partial; int length; fcb = (struct freecom_cb_wrap *) us->iobuf; fst = (struct freecom_status *) us->iobuf; usb_stor_dbg(us, "Freecom TRANSPORT STARTED\n"); /* Get handles for both transports. */ opipe = us->send_bulk_pipe; ipipe = us->recv_bulk_pipe; /* The ATAPI Command always goes out first. */ fcb->Type = FCM_PACKET_ATAPI | 0x00; fcb->Timeout = 0; memcpy (fcb->Atapi, srb->cmnd, 12); memset (fcb->Filler, 0, sizeof (fcb->Filler)); US_DEBUG(pdump(us, srb->cmnd, 12)); /* Send it out. */ result = usb_stor_bulk_transfer_buf (us, opipe, fcb, FCM_PACKET_LENGTH, NULL); /* * The Freecom device will only fail if there is something wrong in * USB land. It returns the status in its own registers, which * come back in the bulk pipe. */ if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "freecom transport error\n"); return USB_STOR_TRANSPORT_ERROR; } /* * There are times we can optimize out this status read, but it * doesn't hurt us to always do it now. */ result = usb_stor_bulk_transfer_buf (us, ipipe, fst, FCM_STATUS_PACKET_LENGTH, &partial); usb_stor_dbg(us, "foo Status result %d %u\n", result, partial); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; US_DEBUG(pdump(us, (void *)fst, partial)); /* * The firmware will time-out commands after 20 seconds. Some commands * can legitimately take longer than this, so we use a different * command that only waits for the interrupt and then sends status, * without having to send a new ATAPI command to the device. * * NOTE: There is some indication that a data transfer after a timeout * may not work, but that is a condition that should never happen. */ while (fst->Status & FCM_STATUS_BUSY) { usb_stor_dbg(us, "20 second USB/ATAPI bridge TIMEOUT occurred!\n"); usb_stor_dbg(us, "fst->Status is %x\n", fst->Status); /* Get the status again */ fcb->Type = FCM_PACKET_STATUS; fcb->Timeout = 0; memset (fcb->Atapi, 0, sizeof(fcb->Atapi)); memset (fcb->Filler, 0, sizeof (fcb->Filler)); /* Send it out. */ result = usb_stor_bulk_transfer_buf (us, opipe, fcb, FCM_PACKET_LENGTH, NULL); /* * The Freecom device will only fail if there is something * wrong in USB land. It returns the status in its own * registers, which come back in the bulk pipe. */ if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "freecom transport error\n"); return USB_STOR_TRANSPORT_ERROR; } /* get the data */ result = usb_stor_bulk_transfer_buf (us, ipipe, fst, FCM_STATUS_PACKET_LENGTH, &partial); usb_stor_dbg(us, "bar Status result %d %u\n", result, partial); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; US_DEBUG(pdump(us, (void *)fst, partial)); } if (partial != 4) return USB_STOR_TRANSPORT_ERROR; if ((fst->Status & 1) != 0) { usb_stor_dbg(us, "operation failed\n"); return USB_STOR_TRANSPORT_FAILED; } /* * The device might not have as much data available as we * requested. If you ask for more than the device has, this reads * and such will hang. */ usb_stor_dbg(us, "Device indicates that it has %d bytes available\n", le16_to_cpu(fst->Count)); usb_stor_dbg(us, "SCSI requested %d\n", scsi_bufflen(srb)); /* Find the length we desire to read. */ switch (srb->cmnd[0]) { case INQUIRY: case REQUEST_SENSE: /* 16 or 18 bytes? spec says 18, lots of devices only have 16 */ case MODE_SENSE: case MODE_SENSE_10: length = le16_to_cpu(fst->Count); break; default: length = scsi_bufflen(srb); } /* verify that this amount is legal */ if (length > scsi_bufflen(srb)) { length = scsi_bufflen(srb); usb_stor_dbg(us, "Truncating request to match buffer length: %d\n", length); } /* * What we do now depends on what direction the data is supposed to * move in. */ switch (us->srb->sc_data_direction) { case DMA_FROM_DEVICE: /* catch bogus "read 0 length" case */ if (!length) break; /* * Make sure that the status indicates that the device * wants data as well. */ if ((fst->Status & DRQ_STAT) == 0 || (fst->Reason & 3) != 2) { usb_stor_dbg(us, "SCSI wants data, drive doesn't have any\n"); return USB_STOR_TRANSPORT_FAILED; } result = freecom_readdata (srb, us, ipipe, opipe, length); if (result != USB_STOR_TRANSPORT_GOOD) return result; usb_stor_dbg(us, "Waiting for status\n"); result = usb_stor_bulk_transfer_buf (us, ipipe, fst, FCM_PACKET_LENGTH, &partial); US_DEBUG(pdump(us, (void *)fst, partial)); if (partial != 4 || result > USB_STOR_XFER_SHORT) return USB_STOR_TRANSPORT_ERROR; if ((fst->Status & ERR_STAT) != 0) { usb_stor_dbg(us, "operation failed\n"); return USB_STOR_TRANSPORT_FAILED; } if ((fst->Reason & 3) != 3) { usb_stor_dbg(us, "Drive seems still hungry\n"); return USB_STOR_TRANSPORT_FAILED; } usb_stor_dbg(us, "Transfer happy\n"); break; case DMA_TO_DEVICE: /* catch bogus "write 0 length" case */ if (!length) break; /* * Make sure the status indicates that the device wants to * send us data. */ /* !!IMPLEMENT!! */ result = freecom_writedata (srb, us, ipipe, opipe, length); if (result != USB_STOR_TRANSPORT_GOOD) return result; usb_stor_dbg(us, "Waiting for status\n"); result = usb_stor_bulk_transfer_buf (us, ipipe, fst, FCM_PACKET_LENGTH, &partial); if (partial != 4 || result > USB_STOR_XFER_SHORT) return USB_STOR_TRANSPORT_ERROR; if ((fst->Status & ERR_STAT) != 0) { usb_stor_dbg(us, "operation failed\n"); return USB_STOR_TRANSPORT_FAILED; } if ((fst->Reason & 3) != 3) { usb_stor_dbg(us, "Drive seems still hungry\n"); return USB_STOR_TRANSPORT_FAILED; } usb_stor_dbg(us, "Transfer happy\n"); break; case DMA_NONE: /* Easy, do nothing. */ break; default: /* should never hit here -- filtered in usb.c */ usb_stor_dbg(us, "freecom unimplemented direction: %d\n", us->srb->sc_data_direction); /* Return fail, SCSI seems to handle this better. */ return USB_STOR_TRANSPORT_FAILED; } return USB_STOR_TRANSPORT_GOOD; } static int init_freecom(struct us_data *us) { int result; char *buffer = us->iobuf; /* * The DMA-mapped I/O buffer is 64 bytes long, just right for * all our packets. No need to allocate any extra buffer space. */ result = usb_stor_control_msg(us, us->recv_ctrl_pipe, 0x4c, 0xc0, 0x4346, 0x0, buffer, 0x20, 3*HZ); buffer[32] = '\0'; usb_stor_dbg(us, "String returned from FC init is: %s\n", buffer); /* * Special thanks to the people at Freecom for providing me with * this "magic sequence", which they use in their Windows and MacOS * drivers to make sure that all the attached perhiperals are * properly reset. */ /* send reset */ result = usb_stor_control_msg(us, us->send_ctrl_pipe, 0x4d, 0x40, 0x24d8, 0x0, NULL, 0x0, 3*HZ); usb_stor_dbg(us, "result from activate reset is %d\n", result); /* wait 250ms */ msleep(250); /* clear reset */ result = usb_stor_control_msg(us, us->send_ctrl_pipe, 0x4d, 0x40, 0x24f8, 0x0, NULL, 0x0, 3*HZ); usb_stor_dbg(us, "result from clear reset is %d\n", result); /* wait 3 seconds */ msleep(3 * 1000); return USB_STOR_TRANSPORT_GOOD; } static int usb_stor_freecom_reset(struct us_data *us) { printk (KERN_CRIT "freecom reset called\n"); /* We don't really have this feature. */ return FAILED; } #ifdef CONFIG_USB_STORAGE_DEBUG static void pdump(struct us_data *us, void *ibuffer, int length) { static char line[80]; int offset = 0; unsigned char *buffer = (unsigned char *) ibuffer; int i, j; int from, base; offset = 0; for (i = 0; i < length; i++) { if ((i & 15) == 0) { if (i > 0) { offset += sprintf (line+offset, " - "); for (j = i - 16; j < i; j++) { if (buffer[j] >= 32 && buffer[j] <= 126) line[offset++] = buffer[j]; else line[offset++] = '.'; } line[offset] = 0; usb_stor_dbg(us, "%s\n", line); offset = 0; } offset += sprintf (line+offset, "%08x:", i); } else if ((i & 7) == 0) { offset += sprintf (line+offset, " -"); } offset += sprintf (line+offset, " %02x", buffer[i] & 0xff); } /* Add the last "chunk" of data. */ from = (length - 1) % 16; base = ((length - 1) / 16) * 16; for (i = from + 1; i < 16; i++) offset += sprintf (line+offset, " "); if (from < 8) offset += sprintf (line+offset, " "); offset += sprintf (line+offset, " - "); for (i = 0; i <= from; i++) { if (buffer[base+i] >= 32 && buffer[base+i] <= 126) line[offset++] = buffer[base+i]; else line[offset++] = '.'; } line[offset] = 0; usb_stor_dbg(us, "%s\n", line); } #endif static struct scsi_host_template freecom_host_template; static int freecom_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - freecom_usb_ids) + freecom_unusual_dev_list, &freecom_host_template); if (result) return result; us->transport_name = "Freecom"; us->transport = freecom_transport; us->transport_reset = usb_stor_freecom_reset; us->max_lun = 0; result = usb_stor_probe2(us); return result; } static struct usb_driver freecom_driver = { .name = DRV_NAME, .probe = freecom_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = freecom_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(freecom_driver, freecom_host_template, DRV_NAME);
7213 123 7101 7212 7213 79 7209 79 7210 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/char/serial_core.h * * Copyright (C) 2000 Deep Blue Solutions Ltd. */ #ifndef LINUX_SERIAL_CORE_H #define LINUX_SERIAL_CORE_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> #include <linux/lockdep.h> #include <linux/printk.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> #include <linux/mutex.h> #include <linux/sysrq.h> #include <uapi/linux/serial_core.h> #ifdef CONFIG_SERIAL_CORE_CONSOLE #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else #define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; struct serial_struct; struct serial_port_device; struct device; struct gpio_desc; /** * struct uart_ops -- interface between serial_core and the driver * * This structure describes all the operations that can be done on the * physical hardware. * * @tx_empty: ``unsigned int ()(struct uart_port *port)`` * * This function tests whether the transmitter fifo and shifter for the * @port is empty. If it is empty, this function should return * %TIOCSER_TEMT, otherwise return 0. If the port does not support this * operation, then it should return %TIOCSER_TEMT. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep * * @set_mctrl: ``void ()(struct uart_port *port, unsigned int mctrl)`` * * This function sets the modem control lines for @port to the state * described by @mctrl. The relevant bits of @mctrl are: * * - %TIOCM_RTS RTS signal. * - %TIOCM_DTR DTR signal. * - %TIOCM_OUT1 OUT1 signal. * - %TIOCM_OUT2 OUT2 signal. * - %TIOCM_LOOP Set the port into loopback mode. * * If the appropriate bit is set, the signal should be driven * active. If the bit is clear, the signal should be driven * inactive. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @get_mctrl: ``unsigned int ()(struct uart_port *port)`` * * Returns the current state of modem control inputs of @port. The state * of the outputs should not be returned, since the core keeps track of * their state. The state information should include: * * - %TIOCM_CAR state of DCD signal * - %TIOCM_CTS state of CTS signal * - %TIOCM_DSR state of DSR signal * - %TIOCM_RI state of RI signal * * The bit is set if the signal is currently driven active. If * the port does not support CTS, DCD or DSR, the driver should * indicate that the signal is permanently active. If RI is * not available, the signal should not be indicated as active. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @stop_tx: ``void ()(struct uart_port *port)`` * * Stop transmitting characters. This might be due to the CTS line * becoming inactive or the tty layer indicating we want to stop * transmission due to an %XOFF character. * * The driver should stop transmitting characters as soon as possible. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @start_tx: ``void ()(struct uart_port *port)`` * * Start transmitting characters. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @throttle: ``void ()(struct uart_port *port)`` * * Notify the serial driver that input buffers for the line discipline are * close to full, and it should somehow signal that no more characters * should be sent to the serial port. * This will be called only if hardware assisted flow control is enabled. * * Locking: serialized with @unthrottle() and termios modification by the * tty layer. * * @unthrottle: ``void ()(struct uart_port *port)`` * * Notify the serial driver that characters can now be sent to the serial * port without fear of overrunning the input buffers of the line * disciplines. * * This will be called only if hardware assisted flow control is enabled. * * Locking: serialized with @throttle() and termios modification by the * tty layer. * * @send_xchar: ``void ()(struct uart_port *port, char ch)`` * * Transmit a high priority character, even if the port is stopped. This * is used to implement XON/XOFF flow control and tcflow(). If the serial * driver does not implement this function, the tty core will append the * character to the circular buffer and then call start_tx() / stop_tx() * to flush the data out. * * Do not transmit if @ch == '\0' (%__DISABLED_CHAR). * * Locking: none. * Interrupts: caller dependent. * * @start_rx: ``void ()(struct uart_port *port)`` * * Start receiving characters. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @stop_rx: ``void ()(struct uart_port *port)`` * * Stop receiving characters; the @port is in the process of being closed. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @enable_ms: ``void ()(struct uart_port *port)`` * * Enable the modem status interrupts. * * This method may be called multiple times. Modem status interrupts * should be disabled when the @shutdown() method is called. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @break_ctl: ``void ()(struct uart_port *port, int ctl)`` * * Control the transmission of a break signal. If @ctl is nonzero, the * break signal should be transmitted. The signal should be terminated * when another call is made with a zero @ctl. * * Locking: caller holds tty_port->mutex * * @startup: ``int ()(struct uart_port *port)`` * * Grab any interrupt resources and initialise any low level driver state. * Enable the port for reception. It should not activate RTS nor DTR; * this will be done via a separate call to @set_mctrl(). * * This method will only be called when the port is initially opened. * * Locking: port_sem taken. * Interrupts: globally disabled. * * @shutdown: ``void ()(struct uart_port *port)`` * * Disable the @port, disable any break condition that may be in effect, * and free any interrupt resources. It should not disable RTS nor DTR; * this will have already been done via a separate call to @set_mctrl(). * * Drivers must not access @port->state once this call has completed. * * This method will only be called when there are no more users of this * @port. * * Locking: port_sem taken. * Interrupts: caller dependent. * * @flush_buffer: ``void ()(struct uart_port *port)`` * * Flush any write buffers, reset any DMA state and stop any ongoing DMA * transfers. * * This will be called whenever the @port->state->xmit circular buffer is * cleared. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @set_termios: ``void ()(struct uart_port *port, struct ktermios *new, * struct ktermios *old)`` * * Change the @port parameters, including word length, parity, stop bits. * Update @port->read_status_mask and @port->ignore_status_mask to * indicate the types of events we are interested in receiving. Relevant * ktermios::c_cflag bits are: * * - %CSIZE - word size * - %CSTOPB - 2 stop bits * - %PARENB - parity enable * - %PARODD - odd parity (when %PARENB is in force) * - %ADDRB - address bit (changed through uart_port::rs485_config()). * - %CREAD - enable reception of characters (if not set, still receive * characters from the port, but throw them away). * - %CRTSCTS - if set, enable CTS status change reporting. * - %CLOCAL - if not set, enable modem status change reporting. * * Relevant ktermios::c_iflag bits are: * * - %INPCK - enable frame and parity error events to be passed to the TTY * layer. * - %BRKINT / %PARMRK - both of these enable break events to be passed to * the TTY layer. * - %IGNPAR - ignore parity and framing errors. * - %IGNBRK - ignore break errors. If %IGNPAR is also set, ignore overrun * errors as well. * * The interaction of the ktermios::c_iflag bits is as follows (parity * error given as an example): * * ============ ======= ======= ========================================= * Parity error INPCK IGNPAR * ============ ======= ======= ========================================= * n/a 0 n/a character received, marked as %TTY_NORMAL * None 1 n/a character received, marked as %TTY_NORMAL * Yes 1 0 character received, marked as %TTY_PARITY * Yes 1 1 character discarded * ============ ======= ======= ========================================= * * Other flags may be used (eg, xon/xoff characters) if your hardware * supports hardware "soft" flow control. * * Locking: caller holds tty_port->mutex * Interrupts: caller dependent. * This call must not sleep * * @set_ldisc: ``void ()(struct uart_port *port, struct ktermios *termios)`` * * Notifier for discipline change. See * Documentation/driver-api/tty/tty_ldisc.rst. * * Locking: caller holds tty_port->mutex * * @pm: ``void ()(struct uart_port *port, unsigned int state, * unsigned int oldstate)`` * * Perform any power management related activities on the specified @port. * @state indicates the new state (defined by enum uart_pm_state), * @oldstate indicates the previous state. * * This function should not be used to grab any resources. * * This will be called when the @port is initially opened and finally * closed, except when the @port is also the system console. This will * occur even if %CONFIG_PM is not set. * * Locking: none. * Interrupts: caller dependent. * * @type: ``const char *()(struct uart_port *port)`` * * Return a pointer to a string constant describing the specified @port, * or return %NULL, in which case the string 'unknown' is substituted. * * Locking: none. * Interrupts: caller dependent. * * @release_port: ``void ()(struct uart_port *port)`` * * Release any memory and IO region resources currently in use by the * @port. * * Locking: none. * Interrupts: caller dependent. * * @request_port: ``int ()(struct uart_port *port)`` * * Request any memory and IO region resources required by the port. If any * fail, no resources should be registered when this function returns, and * it should return -%EBUSY on failure. * * Locking: none. * Interrupts: caller dependent. * * @config_port: ``void ()(struct uart_port *port, int type)`` * * Perform any autoconfiguration steps required for the @port. @type * contains a bit mask of the required configuration. %UART_CONFIG_TYPE * indicates that the port requires detection and identification. * @port->type should be set to the type found, or %PORT_UNKNOWN if no * port was detected. * * %UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal, * which should be probed using standard kernel autoprobing techniques. * This is not necessary on platforms where ports have interrupts * internally hard wired (eg, system on a chip implementations). * * Locking: none. * Interrupts: caller dependent. * * @verify_port: ``int ()(struct uart_port *port, * struct serial_struct *serinfo)`` * * Verify the new serial port information contained within @serinfo is * suitable for this port type. * * Locking: none. * Interrupts: caller dependent. * * @ioctl: ``int ()(struct uart_port *port, unsigned int cmd, * unsigned long arg)`` * * Perform any port specific IOCTLs. IOCTL commands must be defined using * the standard numbering system found in <asm/ioctl.h>. * * Locking: none. * Interrupts: caller dependent. * * @poll_init: ``int ()(struct uart_port *port)`` * * Called by kgdb to perform the minimal hardware initialization needed to * support @poll_put_char() and @poll_get_char(). Unlike @startup(), this * should not request interrupts. * * Locking: %tty_mutex and tty_port->mutex taken. * Interrupts: n/a. * * @poll_put_char: ``void ()(struct uart_port *port, unsigned char ch)`` * * Called by kgdb to write a single character @ch directly to the serial * @port. It can and should block until there is space in the TX FIFO. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep * * @poll_get_char: ``int ()(struct uart_port *port)`` * * Called by kgdb to read a single character directly from the serial * port. If data is available, it should be returned; otherwise the * function should return %NO_POLL_CHAR immediately. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep */ struct uart_ops { unsigned int (*tx_empty)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int mctrl); unsigned int (*get_mctrl)(struct uart_port *); void (*stop_tx)(struct uart_port *); void (*start_tx)(struct uart_port *); void (*throttle)(struct uart_port *); void (*unthrottle)(struct uart_port *); void (*send_xchar)(struct uart_port *, char ch); void (*stop_rx)(struct uart_port *); void (*start_rx)(struct uart_port *); void (*enable_ms)(struct uart_port *); void (*break_ctl)(struct uart_port *, int ctl); int (*startup)(struct uart_port *); void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); const char *(*type)(struct uart_port *); void (*release_port)(struct uart_port *); int (*request_port)(struct uart_port *); void (*config_port)(struct uart_port *, int); int (*verify_port)(struct uart_port *, struct serial_struct *); int (*ioctl)(struct uart_port *, unsigned int, unsigned long); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct uart_port *); void (*poll_put_char)(struct uart_port *, unsigned char); int (*poll_get_char)(struct uart_port *); #endif }; #define NO_POLL_CHAR 0x00ff0000 #define UART_CONFIG_TYPE (1 << 0) #define UART_CONFIG_IRQ (1 << 1) struct uart_icount { __u32 cts; __u32 dsr; __u32 rng; __u32 dcd; __u32 rx; __u32 tx; __u32 frame; __u32 overrun; __u32 parity; __u32 brk; __u32 buf_overrun; }; typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ unsigned int (*serial_in)(struct uart_port *, int); void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int); unsigned int (*get_divisor)(struct uart_port *, unsigned int baud, unsigned int *frac); void (*set_divisor)(struct uart_port *, unsigned int baud, unsigned int quot, unsigned int quot_frac); int (*startup)(struct uart_port *port); void (*shutdown)(struct uart_port *port); void (*throttle)(struct uart_port *port); void (*unthrottle)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); void (*handle_break)(struct uart_port *); int (*rs485_config)(struct uart_port *, struct ktermios *termios, struct serial_rs485 *rs485); int (*iso7816_config)(struct uart_port *, struct serial_iso7816 *iso7816); unsigned int ctrl_id; /* optional serial core controller id */ unsigned int port_id; /* optional serial core port id */ unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ #define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ #define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ #define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ #define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ unsigned char quirks; /* internal quirks */ /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ struct uart_icount icount; /* statistics */ struct console *cons; /* struct console, if any */ /* flags must be updated while holding port mutex */ upf_t flags; /* * These flags must be equivalent to the flags defined in * include/uapi/linux/tty_flags.h which are the userspace definitions * assigned from the serial_struct flags in uart_set_info() * [for bit definitions in the UPF_CHANGE_MASK] * * Bits [0..ASYNCB_LAST_USER] are userspace defined/visible/changeable * The remaining bits are serial-core specific and not modifiable by * userspace. */ #ifdef CONFIG_HAS_IOPORT #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) #else #define UPF_FOURPORT 0 #endif #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) #define UPF_SPD_CUST ((__force upf_t) ASYNC_SPD_CUST /* 0x0030 */ ) #define UPF_SPD_WARP ((__force upf_t) ASYNC_SPD_WARP /* 0x1010 */ ) #define UPF_SPD_MASK ((__force upf_t) ASYNC_SPD_MASK /* 0x1030 */ ) #define UPF_SKIP_TEST ((__force upf_t) ASYNC_SKIP_TEST /* 6 */ ) #define UPF_AUTO_IRQ ((__force upf_t) ASYNC_AUTO_IRQ /* 7 */ ) #define UPF_HARDPPS_CD ((__force upf_t) ASYNC_HARDPPS_CD /* 11 */ ) #define UPF_SPD_SHI ((__force upf_t) ASYNC_SPD_SHI /* 12 */ ) #define UPF_LOW_LATENCY ((__force upf_t) ASYNC_LOW_LATENCY /* 13 */ ) #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) #define UPF_NO_THRE_TEST ((__force upf_t) BIT_ULL(19)) /* Port has hardware-assisted h/w flow control */ #define UPF_AUTO_CTS ((__force upf_t) BIT_ULL(20)) #define UPF_AUTO_RTS ((__force upf_t) BIT_ULL(21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ #define UPF_SOFT_FLOW ((__force upf_t) BIT_ULL(22)) #define UPF_CONS_FLOW ((__force upf_t) BIT_ULL(23)) #define UPF_SHARE_IRQ ((__force upf_t) BIT_ULL(24)) #define UPF_EXAR_EFR ((__force upf_t) BIT_ULL(25)) #define UPF_BUG_THRE ((__force upf_t) BIT_ULL(26)) /* The exact UART type is known and should not be probed. */ #define UPF_FIXED_TYPE ((__force upf_t) BIT_ULL(27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) BIT_ULL(28)) #define UPF_FIXED_PORT ((__force upf_t) BIT_ULL(29)) #define UPF_DEAD ((__force upf_t) BIT_ULL(30)) #define UPF_IOREMAP ((__force upf_t) BIT_ULL(31)) #define UPF_FULL_PROBE ((__force upf_t) BIT_ULL(32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) #define UPF_USR_MASK ((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY)) #if __UPF_CHANGE_MASK > ASYNC_FLAGS #error Change mask not equivalent to userspace-visible bit defines #endif /* * Must hold termios_rwsem, port mutex and port lock to change; * can hold any one lock to read. */ upstat_t status; #define UPSTAT_CTS_ENABLE ((__force upstat_t) (1 << 0)) #define UPSTAT_DCD_ENABLE ((__force upstat_t) (1 << 1)) #define UPSTAT_AUTORTS ((__force upstat_t) (1 << 2)) #define UPSTAT_AUTOCTS ((__force upstat_t) (1 << 3)) #define UPSTAT_AUTOXOFF ((__force upstat_t) (1 << 4)) #define UPSTAT_SYNC_FIFO ((__force upstat_t) (1 << 5)) bool hw_stopped; /* sw-assisted CTS flow state */ unsigned int mctrl; /* current modem ctrl settings */ unsigned int frame_time; /* frame timing in ns */ unsigned int type; /* port type */ const struct uart_ops *ops; unsigned int custom_divisor; unsigned int line; /* port index */ unsigned int minor; resource_size_t mapbase; /* for ioremap */ resource_size_t mapsize; struct device *dev; /* serial port physical parent device */ struct serial_port_device *port_dev; /* serial core port device */ unsigned long sysrq; /* sysrq timeout */ u8 sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; unsigned char console_reinit; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; struct serial_rs485 rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct gpio_desc *rs485_rx_during_tx_gpio; /* Output GPIO that sets the state of RS485 RX during TX */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; /* * Only for console->device_lock()/_unlock() callbacks and internal * port lock wrapper synchronization. */ static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); } /* * Only for console->device_lock()/_unlock() callbacks and internal * port lock wrapper synchronization. */ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { spin_unlock_irqrestore(&up->lock, flags); } /** * uart_port_set_cons - Safely set the @cons field for a uart * @up: The uart port to set * @con: The new console to set to * * This function must be used to set @up->cons. It uses the port lock to * synchronize with the port lock wrappers in order to ensure that the console * cannot change or disappear while another context is holding the port lock. */ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) { unsigned long flags; __uart_port_lock_irqsave(up, &flags); up->cons = con; __uart_port_unlock_irqrestore(up, flags); } /* Only for internal port lock wrapper usage. */ static inline bool __uart_port_using_nbcon(struct uart_port *up) { lockdep_assert_held_once(&up->lock); if (likely(!uart_console(up))) return false; /* * @up->cons is only modified under the port lock. Therefore it is * certain that it cannot disappear here. * * @up->cons->node is added/removed from the console list under the * port lock. Therefore it is certain that the registration status * cannot change here, thus @up->cons->flags can be read directly. */ if (hlist_unhashed_lockless(&up->cons->node) || !(up->cons->flags & CON_NBCON) || !up->cons->write_atomic) { return false; } return true; } /* Only for internal port lock wrapper usage. */ static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return true; return nbcon_device_try_acquire(up->cons); } /* Only for internal port lock wrapper usage. */ static inline void __uart_port_nbcon_acquire(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return; while (!nbcon_device_try_acquire(up->cons)) cpu_relax(); } /* Only for internal port lock wrapper usage. */ static inline void __uart_port_nbcon_release(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return; nbcon_device_release(up->cons); } /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure */ static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); __uart_port_nbcon_acquire(up); } /** * uart_port_lock_irq - Lock the UART port and disable interrupts * @up: Pointer to UART port structure */ static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); __uart_port_nbcon_acquire(up); } /** * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts * @up: Pointer to UART port structure * @flags: Pointer to interrupt flags storage */ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); __uart_port_nbcon_acquire(up); } /** * uart_port_trylock - Try to lock the UART port * @up: Pointer to UART port structure * * Returns: True if lock was acquired, false otherwise */ static inline bool uart_port_trylock(struct uart_port *up) { if (!spin_trylock(&up->lock)) return false; if (!__uart_port_nbcon_try_acquire(up)) { spin_unlock(&up->lock); return false; } return true; } /** * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts * @up: Pointer to UART port structure * @flags: Pointer to interrupt flags storage * * Returns: True if lock was acquired, false otherwise */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { if (!spin_trylock_irqsave(&up->lock, *flags)) return false; if (!__uart_port_nbcon_try_acquire(up)) { spin_unlock_irqrestore(&up->lock, *flags); return false; } return true; } /** * uart_port_unlock - Unlock the UART port * @up: Pointer to UART port structure */ static inline void uart_port_unlock(struct uart_port *up) { __uart_port_nbcon_release(up); spin_unlock(&up->lock); } /** * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts * @up: Pointer to UART port structure */ static inline void uart_port_unlock_irq(struct uart_port *up) { __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } /** * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts * @up: Pointer to UART port structure * @flags: The saved interrupt flags for restore */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); } static inline void serial_port_out(struct uart_port *up, int offset, int value) { up->serial_out(up, offset, value); } /** * enum uart_pm_state - power states for UARTs * @UART_PM_STATE_ON: UART is powered, up and operational * @UART_PM_STATE_OFF: UART is powered off * @UART_PM_STATE_UNDEFINED: sentinel */ enum uart_pm_state { UART_PM_STATE_ON = 0, UART_PM_STATE_OFF = 3, /* number taken from ACPI */ UART_PM_STATE_UNDEFINED, }; /* * This is the state information which is persistent across opens. */ struct uart_state { struct tty_port port; enum uart_pm_state pm_state; atomic_t refcount; wait_queue_head_t remove_wait; struct uart_port *uart_port; }; #define UART_XMIT_SIZE PAGE_SIZE /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 /** * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars * @up: uart_port structure describing the port * @chars: number of characters sent * * This function advances the tail of circular xmit buffer by the number of * @chars transmitted and handles accounting of transmitted bytes (into * @up's icount.tx). */ static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) { struct tty_port *tport = &up->state->port; kfifo_skip_count(&tport->xmit_fifo, chars); up->icount.tx += chars; } static inline unsigned int uart_fifo_out(struct uart_port *up, unsigned char *buf, unsigned int chars) { struct tty_port *tport = &up->state->port; chars = kfifo_out(&tport->xmit_fifo, buf, chars); up->icount.tx += chars; return chars; } static inline unsigned int uart_fifo_get(struct uart_port *up, unsigned char *ch) { struct tty_port *tport = &up->state->port; unsigned int chars; chars = kfifo_get(&tport->xmit_fifo, ch); up->icount.tx += chars; return chars; } struct module; struct tty_driver; struct uart_driver { struct module *owner; const char *driver_name; const char *dev_name; int major; int minor; int nr; struct console *cons; /* * these are private; the low level driver should not * touch these; they should be initialised to NULL */ struct uart_state *state; struct tty_driver *tty_driver; }; void uart_write_wakeup(struct uart_port *port); /** * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() * * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer */ enum UART_TX_FLAGS { UART_TX_NOSTOP = BIT(0), }; #define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct tty_port *__tport = &__port->state->port; \ unsigned int pending; \ \ for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) { \ if (__port->x_char) { \ (ch) = __port->x_char; \ (put_char); \ __port->x_char = 0; \ continue; \ } \ \ if (uart_tx_stopped(__port)) \ break; \ \ if (!kfifo_get(&__tport->xmit_fifo, &(ch))) \ break; \ \ (put_char); \ } \ \ (tx_done); \ \ pending = kfifo_len(&__tport->xmit_fifo); \ if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ pending; \ }) /** * uart_port_tx_limited -- transmit helper for uart_port with count limiting * @port: uart port * @ch: variable to store a character to be written to the HW * @count: a limit of characters to send * @tx_ready: can HW accept more data function * @put_char: function to write a character * @tx_done: function to call after the loop is done * * This helper transmits characters from the xmit buffer to the hardware using * @put_char(). It does so until @count characters are sent and while @tx_ready * evaluates to true. * * Returns: the number of characters in the xmit buffer when done. * * The expression in macro parameters shall be designed as follows: * * **tx_ready:** should evaluate to true if the HW can accept more data to * be sent. This parameter can be %true, which means the HW is always ready. * * **put_char:** shall write @ch to the device of @port. * * **tx_done:** when the write loop is done, this can perform arbitrary * action before potential invocation of ops->stop_tx() happens. If the * driver does not need to do anything, use e.g. ({}). * * For all of them, @port->lock is held, interrupts are locally disabled and * the expressions must not sleep. */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) /** * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags * @port: uart port * @ch: variable to store a character to be written to the HW * @flags: %UART_TX_NOSTOP or similar * @count: a limit of characters to send * @tx_ready: can HW accept more data function * @put_char: function to write a character * @tx_done: function to call after the loop is done * * See uart_port_tx_limited() for more details. */ #define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \ __count--); \ }) /** * uart_port_tx -- transmit helper for uart_port * @port: uart port * @ch: variable to store a character to be written to the HW * @tx_ready: can HW accept more data function * @put_char: function to write a character * * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) /** * uart_port_tx_flags -- transmit helper for uart_port with flags * @port: uart port * @ch: variable to store a character to be written to the HW * @flags: %UART_TX_NOSTOP or similar * @tx_ready: can HW accept more data function * @put_char: function to write a character * * See uart_port_tx_limited() for more details. */ #define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, const struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); /* * Calculates FIFO drain time. */ static inline unsigned long uart_fifo_timeout(struct uart_port *port) { u64 fifo_timeout = (u64)READ_ONCE(port->frame_time) * port->fifosize; /* Add .02 seconds of slop */ fifo_timeout += 20 * NSEC_PER_MSEC; return max(nsecs_to_jiffies(fifo_timeout), 1UL); } /* Base timer interval for polling */ static inline unsigned long uart_poll_timeout(struct uart_port *port) { unsigned long timeout = uart_fifo_timeout(port); return timeout > 6 ? (timeout / 2 - 2) : 1; } /* * Console helpers. */ struct earlycon_device { struct console *con; struct uart_port port; char options[32]; /* e.g., 115200n8 */ unsigned int baud; }; struct earlycon_id { char name[15]; char name_term; /* In case compiler didn't '\0' term name */ char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); }; extern const struct earlycon_id __earlycon_table[]; extern const struct earlycon_id __earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used #else #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif #define OF_EARLYCON_DECLARE(_name, compat, fn) \ static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \ EARLYCON_USED_OR_UNUSED __section("__earlycon_table") \ __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ .setup = fn } #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) int of_setup_earlycon(const struct earlycon_id *match, unsigned long node, const char *options); #ifdef CONFIG_SERIAL_EARLYCON extern bool earlycon_acpi_spcr_enable __initdata; int setup_earlycon(char *buf); #else static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif /* Variant of uart_console_registered() when the console_list_lock is held. */ static inline bool uart_console_registered_locked(struct uart_port *port) { return uart_console(port) && console_is_registered_locked(port->cons); } static inline bool uart_console_registered(struct uart_port *port) { return uart_console(port) && console_is_registered(port->cons); } struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, int parity, int bits, int flow); struct tty_driver *uart_console_device(struct console *co, int *index); void uart_console_write(struct uart_port *port, const char *s, unsigned int count, void (*putchar)(struct uart_port *, unsigned char)); /* * Port/driver registration/removal */ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); int uart_read_port_properties(struct uart_port *port); int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); /* * Power Management */ int uart_suspend_port(struct uart_driver *reg, struct uart_port *port); int uart_resume_port(struct uart_driver *reg, struct uart_port *port); static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; if ((tty && tty->flow.stopped) || port->hw_stopped) return 1; return 0; } static inline bool uart_cts_enabled(struct uart_port *uport) { return !!(uport->status & UPSTAT_CTS_ENABLE); } static inline bool uart_softcts_mode(struct uart_port *uport) { upstat_t mask = UPSTAT_CTS_ENABLE | UPSTAT_AUTOCTS; return ((uport->status & mask) == UPSTAT_CTS_ENABLE); } /* * The following are helper functions for the low level drivers. */ void uart_handle_dcd_change(struct uart_port *uport, bool active); void uart_handle_cts_change(struct uart_port *uport, bool active); void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, u8 ch, u8 flag); void uart_xchar_out(struct uart_port *uport, int offset); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch); static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { handle_sysrq(ch); port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { port->sysrq_ch = ch; port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { u8 sysrq_ch; if (!port->has_sysrq) { uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { u8 sysrq_ch; if (!port->has_sysrq) { uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* * We do the SysRQ and SAK checking like this... */ static inline int uart_handle_break(struct uart_port *port) { struct uart_state *state = port->state; if (port->handle_break) port->handle_break(port); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL if (port->has_sysrq && uart_console(port)) { if (!port->sysrq) { port->sysrq = jiffies + SYSRQ_TIMEOUT; return 1; } port->sysrq = 0; } #endif if (port->flags & UPF_SAK) do_SAK(state->port.tty); return 0; } /* * UART_ENABLE_MS - determine if port should enable modem status irqs */ #define UART_ENABLE_MS(port,cflag) ((port)->flags & UPF_HARDPPS_CD || \ (cflag) & CRTSCTS || \ !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */
239 216 196 197 2 1 2 2 5 5 4 1 26 26 1 22 22 21 21 21 21 21 6 3 3 2 1 31 3 3 2 1 28 21 11 41 40 1 41 3 30 27 23 9 32 6 191 172 173 8 23 201 202 104 100 7 100 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 /* * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Author Rickard E. (Rik) Faith <faith@valinux.com> * Author Gareth Hughes <gareth@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/slab.h> #include <drm/drm_auth.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_lease.h> #include <drm/drm_print.h> #include "drm_internal.h" /** * DOC: master and authentication * * &struct drm_master is used to track groups of clients with open * primary device nodes. For every &struct drm_file which has had at * least once successfully became the device master (either through the * SET_MASTER IOCTL, or implicitly through opening the primary device node when * no one else is the current master that time) there exists one &drm_master. * This is noted in &drm_file.is_master. All other clients have just a pointer * to the &drm_master they are associated with. * * In addition only one &drm_master can be the current master for a &drm_device. * It can be switched through the DROP_MASTER and SET_MASTER IOCTL, or * implicitly through closing/opening the primary device node. See also * drm_is_current_master(). * * Clients can authenticate against the current master (if it matches their own) * using the GETMAGIC and AUTHMAGIC IOCTLs. Together with exchanging masters, * this allows controlled access to the device for an entire group of mutually * trusted clients. */ static bool drm_is_current_master_locked(struct drm_file *fpriv) { lockdep_assert_once(lockdep_is_held(&fpriv->master_lookup_lock) || lockdep_is_held(&fpriv->minor->dev->master_mutex)); return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; } /** * drm_is_current_master - checks whether @priv is the current master * @fpriv: DRM file private * * Checks whether @fpriv is current master on its device. This decides whether a * client is allowed to run DRM_MASTER IOCTLs. * * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting * - the current master is assumed to own the non-shareable display hardware. */ bool drm_is_current_master(struct drm_file *fpriv) { bool ret; spin_lock(&fpriv->master_lookup_lock); ret = drm_is_current_master_locked(fpriv); spin_unlock(&fpriv->master_lookup_lock); return ret; } EXPORT_SYMBOL(drm_is_current_master); int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; int ret = 0; mutex_lock(&dev->master_mutex); if (!file_priv->magic) { ret = idr_alloc(&file_priv->master->magic_map, file_priv, 1, 0, GFP_KERNEL); if (ret >= 0) file_priv->magic = ret; } auth->magic = file_priv->magic; mutex_unlock(&dev->master_mutex); drm_dbg_core(dev, "%u\n", auth->magic); return ret < 0 ? ret : 0; } int drm_authmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; struct drm_file *file; drm_dbg_core(dev, "%u\n", auth->magic); mutex_lock(&dev->master_mutex); file = idr_find(&file_priv->master->magic_map, auth->magic); if (file) { file->authenticated = 1; idr_replace(&file_priv->master->magic_map, NULL, auth->magic); } mutex_unlock(&dev->master_mutex); return file ? 0 : -EINVAL; } struct drm_master *drm_master_create(struct drm_device *dev) { struct drm_master *master; master = kzalloc(sizeof(*master), GFP_KERNEL); if (!master) return NULL; kref_init(&master->refcount); idr_init_base(&master->magic_map, 1); master->dev = dev; /* initialize the tree of output resource lessees */ INIT_LIST_HEAD(&master->lessees); INIT_LIST_HEAD(&master->lessee_list); idr_init(&master->leases); idr_init_base(&master->lessee_idr, 1); return master; } static void drm_set_master(struct drm_device *dev, struct drm_file *fpriv, bool new_master) { dev->master = drm_master_get(fpriv->master); if (dev->driver->master_set) dev->driver->master_set(dev, fpriv, new_master); fpriv->was_master = true; } static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) { struct drm_master *old_master; struct drm_master *new_master; lockdep_assert_held_once(&dev->master_mutex); WARN_ON(fpriv->is_master); old_master = fpriv->master; new_master = drm_master_create(dev); if (!new_master) return -ENOMEM; spin_lock(&fpriv->master_lookup_lock); fpriv->master = new_master; spin_unlock(&fpriv->master_lookup_lock); fpriv->is_master = 1; fpriv->authenticated = 1; drm_set_master(dev, fpriv, true); if (old_master) drm_master_put(&old_master); return 0; } /* * In the olden days the SET/DROP_MASTER ioctls used to return EACCES when * CAP_SYS_ADMIN was not set. This was used to prevent rogue applications * from becoming master and/or failing to release it. * * At the same time, the first client (for a given VT) is _always_ master. * Thus in order for the ioctls to succeed, one had to _explicitly_ run the * application as root or flip the setuid bit. * * If the CAP_SYS_ADMIN was missing, no other client could become master... * EVER :-( Leading to a) the graphics session dying badly or b) a completely * locked session. * * * As some point systemd-logind was introduced to orchestrate and delegate * master as applicable. It does so by opening the fd and passing it to users * while in itself logind a) does the set/drop master per users' request and * b) * implicitly drops master on VT switch. * * Even though logind looks like the future, there are a few issues: * - some platforms don't have equivalent (Android, CrOS, some BSDs) so * root is required _solely_ for SET/DROP MASTER. * - applications may not be updated to use it, * - any client which fails to drop master* can DoS the application using * logind, to a varying degree. * * * Either due missing CAP_SYS_ADMIN or simply not calling DROP_MASTER. * * * Here we implement the next best thing: * - ensure the logind style of fd passing works unchanged, and * - allow a client to drop/set master, iff it is/was master at a given point * in time. * * Note: DROP_MASTER cannot be free for all, as an arbitrator user could: * - DoS/crash the arbitrator - details would be implementation specific * - open the node, become master implicitly and cause issues * * As a result this fixes the following when using root-less build w/o logind * - startx * - weston * - various compositors based on wlroots */ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) return -EACCES; return 0; } int drm_setmaster_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { int ret; mutex_lock(&dev->master_mutex); ret = drm_master_check_perm(dev, file_priv); if (ret) goto out_unlock; if (drm_is_current_master_locked(file_priv)) goto out_unlock; if (dev->master) { ret = -EBUSY; goto out_unlock; } if (!file_priv->master) { ret = -EINVAL; goto out_unlock; } if (!file_priv->is_master) { ret = drm_new_set_master(dev, file_priv); goto out_unlock; } if (file_priv->master->lessor != NULL) { drm_dbg_lease(dev, "Attempt to set lessee %d as master\n", file_priv->master->lessee_id); ret = -EINVAL; goto out_unlock; } drm_set_master(dev, file_priv, false); out_unlock: mutex_unlock(&dev->master_mutex); return ret; } static void drm_drop_master(struct drm_device *dev, struct drm_file *fpriv) { if (dev->driver->master_drop) dev->driver->master_drop(dev, fpriv); drm_master_put(&dev->master); } int drm_dropmaster_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { int ret; mutex_lock(&dev->master_mutex); ret = drm_master_check_perm(dev, file_priv); if (ret) goto out_unlock; if (!drm_is_current_master_locked(file_priv)) { ret = -EINVAL; goto out_unlock; } if (!dev->master) { ret = -EINVAL; goto out_unlock; } if (file_priv->master->lessor != NULL) { drm_dbg_lease(dev, "Attempt to drop lessee %d as master\n", file_priv->master->lessee_id); ret = -EINVAL; goto out_unlock; } drm_drop_master(dev, file_priv); out_unlock: mutex_unlock(&dev->master_mutex); return ret; } int drm_master_open(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; int ret = 0; /* if there is no current master make this fd it, but do not create * any master object for render clients */ mutex_lock(&dev->master_mutex); if (!dev->master) { ret = drm_new_set_master(dev, file_priv); } else { spin_lock(&file_priv->master_lookup_lock); file_priv->master = drm_master_get(dev->master); spin_unlock(&file_priv->master_lookup_lock); } mutex_unlock(&dev->master_mutex); return ret; } void drm_master_release(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; struct drm_master *master; mutex_lock(&dev->master_mutex); master = file_priv->master; if (file_priv->magic) idr_remove(&file_priv->master->magic_map, file_priv->magic); if (!drm_is_current_master_locked(file_priv)) goto out; if (dev->master == file_priv->master) drm_drop_master(dev, file_priv); out: if (drm_core_check_feature(dev, DRIVER_MODESET) && file_priv->is_master) { /* Revoke any leases held by this or lessees, but only if * this is the "real" master */ drm_lease_revoke(master); } /* drop the master reference held by the file priv */ if (file_priv->master) drm_master_put(&file_priv->master); mutex_unlock(&dev->master_mutex); } /** * drm_master_get - reference a master pointer * @master: &struct drm_master * * Increments the reference count of @master and returns a pointer to @master. */ struct drm_master *drm_master_get(struct drm_master *master) { kref_get(&master->refcount); return master; } EXPORT_SYMBOL(drm_master_get); /** * drm_file_get_master - reference &drm_file.master of @file_priv * @file_priv: DRM file private * * Increments the reference count of @file_priv's &drm_file.master and returns * the &drm_file.master. If @file_priv has no &drm_file.master, returns NULL. * * Master pointers returned from this function should be unreferenced using * drm_master_put(). */ struct drm_master *drm_file_get_master(struct drm_file *file_priv) { struct drm_master *master = NULL; spin_lock(&file_priv->master_lookup_lock); if (!file_priv->master) goto unlock; master = drm_master_get(file_priv->master); unlock: spin_unlock(&file_priv->master_lookup_lock); return master; } EXPORT_SYMBOL(drm_file_get_master); static void drm_master_destroy(struct kref *kref) { struct drm_master *master = container_of(kref, struct drm_master, refcount); struct drm_device *dev = master->dev; if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_lease_destroy(master); idr_destroy(&master->magic_map); idr_destroy(&master->leases); idr_destroy(&master->lessee_idr); kfree(master->unique); kfree(master); } /** * drm_master_put - unreference and clear a master pointer * @master: pointer to a pointer of &struct drm_master * * This decrements the &drm_master behind @master and sets it to NULL. */ void drm_master_put(struct drm_master **master) { kref_put(&(*master)->refcount, drm_master_destroy); *master = NULL; } EXPORT_SYMBOL(drm_master_put); /* Used by drm_client and drm_fb_helper */ bool drm_master_internal_acquire(struct drm_device *dev) { mutex_lock(&dev->master_mutex); if (dev->master) { mutex_unlock(&dev->master_mutex); return false; } return true; } EXPORT_SYMBOL(drm_master_internal_acquire); /* Used by drm_client and drm_fb_helper */ void drm_master_internal_release(struct drm_device *dev) { mutex_unlock(&dev->master_mutex); } EXPORT_SYMBOL(drm_master_internal_release);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Lexar "Jumpshot" Compact Flash reader * * jumpshot driver v0.1: * * First release * * Current development and maintenance by: * (c) 2000 Jimmie Mayfield (mayfield+usb@sackheads.org) * * Many thanks to Robert Baruch for the SanDisk SmartMedia reader driver * which I used as a template for this driver. * * Some bugfixes and scatter-gather code by Gregory P. Smith * (greg-usb@electricrain.com) * * Fix for media change by Joerg Schneider (js@joergschneider.com) * * Developed with the assistance of: * * (C) 2002 Alan Stern <stern@rowland.org> */ /* * This driver attempts to support the Lexar Jumpshot USB CompactFlash * reader. Like many other USB CompactFlash readers, the Jumpshot contains * a USB-to-ATA chip. * * This driver supports reading and writing. If you're truly paranoid, * however, you can force the driver into a write-protected state by setting * the WP enable bits in jumpshot_handle_mode_sense. See the comments * in that routine. */ #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-jumpshot" MODULE_DESCRIPTION("Driver for Lexar \"Jumpshot\" Compact Flash reader"); MODULE_AUTHOR("Jimmie Mayfield <mayfield+usb@sackheads.org>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id jumpshot_usb_ids[] = { # include "unusual_jumpshot.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, jumpshot_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev jumpshot_unusual_dev_list[] = { # include "unusual_jumpshot.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV struct jumpshot_info { unsigned long sectors; /* total sector count */ unsigned long ssize; /* sector size in bytes */ /* the following aren't used yet */ unsigned char sense_key; unsigned long sense_asc; /* additional sense code */ unsigned long sense_ascq; /* additional sense code qualifier */ }; static inline int jumpshot_bulk_read(struct us_data *us, unsigned char *data, unsigned int len) { if (len == 0) return USB_STOR_XFER_GOOD; usb_stor_dbg(us, "len = %d\n", len); return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, len, NULL); } static inline int jumpshot_bulk_write(struct us_data *us, unsigned char *data, unsigned int len) { if (len == 0) return USB_STOR_XFER_GOOD; usb_stor_dbg(us, "len = %d\n", len); return usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, data, len, NULL); } static int jumpshot_get_status(struct us_data *us) { int rc; if (!us) return USB_STOR_TRANSPORT_ERROR; // send the setup rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, 0, 0xA0, 0, 7, us->iobuf, 1); if (rc != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (us->iobuf[0] != 0x50) { usb_stor_dbg(us, "0x%2x\n", us->iobuf[0]); return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; } static int jumpshot_read_data(struct us_data *us, struct jumpshot_info *info, u32 sector, u32 sectors) { unsigned char *command = us->iobuf; unsigned char *buffer; unsigned char thistime; unsigned int totallen, alloclen; int len, result; unsigned int sg_offset = 0; struct scatterlist *sg = NULL; // we're working in LBA mode. according to the ATA spec, // we can support up to 28-bit addressing. I don't know if Jumpshot // supports beyond 24-bit addressing. It's kind of hard to test // since it requires > 8GB CF card. if (sector > 0x0FFFFFFF) return USB_STOR_TRANSPORT_ERROR; totallen = sectors * info->ssize; // Since we don't read more than 64 KB at a time, we have to create // a bounce buffer and move the data a piece at a time between the // bounce buffer and the actual transfer buffer. alloclen = min(totallen, 65536u); buffer = kmalloc(alloclen, GFP_NOIO); if (buffer == NULL) return USB_STOR_TRANSPORT_ERROR; do { // loop, never allocate or transfer more than 64k at once // (min(128k, 255*info->ssize) is the real limit) len = min(totallen, alloclen); thistime = (len / info->ssize) & 0xff; command[0] = 0; command[1] = thistime; command[2] = sector & 0xFF; command[3] = (sector >> 8) & 0xFF; command[4] = (sector >> 16) & 0xFF; command[5] = 0xE0 | ((sector >> 24) & 0x0F); command[6] = 0x20; // send the setup + command result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, 0, 0x20, 0, 1, command, 7); if (result != USB_STOR_XFER_GOOD) goto leave; // read the result result = jumpshot_bulk_read(us, buffer, len); if (result != USB_STOR_XFER_GOOD) goto leave; usb_stor_dbg(us, "%d bytes\n", len); // Store the data in the transfer buffer usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &sg_offset, TO_XFER_BUF); sector += thistime; totallen -= len; } while (totallen > 0); kfree(buffer); return USB_STOR_TRANSPORT_GOOD; leave: kfree(buffer); return USB_STOR_TRANSPORT_ERROR; } static int jumpshot_write_data(struct us_data *us, struct jumpshot_info *info, u32 sector, u32 sectors) { unsigned char *command = us->iobuf; unsigned char *buffer; unsigned char thistime; unsigned int totallen, alloclen; int len, result, waitcount; unsigned int sg_offset = 0; struct scatterlist *sg = NULL; // we're working in LBA mode. according to the ATA spec, // we can support up to 28-bit addressing. I don't know if Jumpshot // supports beyond 24-bit addressing. It's kind of hard to test // since it requires > 8GB CF card. // if (sector > 0x0FFFFFFF) return USB_STOR_TRANSPORT_ERROR; totallen = sectors * info->ssize; // Since we don't write more than 64 KB at a time, we have to create // a bounce buffer and move the data a piece at a time between the // bounce buffer and the actual transfer buffer. alloclen = min(totallen, 65536u); buffer = kmalloc(alloclen, GFP_NOIO); if (buffer == NULL) return USB_STOR_TRANSPORT_ERROR; do { // loop, never allocate or transfer more than 64k at once // (min(128k, 255*info->ssize) is the real limit) len = min(totallen, alloclen); thistime = (len / info->ssize) & 0xff; // Get the data from the transfer buffer usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &sg_offset, FROM_XFER_BUF); command[0] = 0; command[1] = thistime; command[2] = sector & 0xFF; command[3] = (sector >> 8) & 0xFF; command[4] = (sector >> 16) & 0xFF; command[5] = 0xE0 | ((sector >> 24) & 0x0F); command[6] = 0x30; // send the setup + command result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, 0, 0x20, 0, 1, command, 7); if (result != USB_STOR_XFER_GOOD) goto leave; // send the data result = jumpshot_bulk_write(us, buffer, len); if (result != USB_STOR_XFER_GOOD) goto leave; // read the result. apparently the bulk write can complete // before the jumpshot drive is finished writing. so we loop // here until we get a good return code waitcount = 0; do { result = jumpshot_get_status(us); if (result != USB_STOR_TRANSPORT_GOOD) { // I have not experimented to find the smallest value. // msleep(50); } } while ((result != USB_STOR_TRANSPORT_GOOD) && (waitcount < 10)); if (result != USB_STOR_TRANSPORT_GOOD) usb_stor_dbg(us, "Gah! Waitcount = 10. Bad write!?\n"); sector += thistime; totallen -= len; } while (totallen > 0); kfree(buffer); return result; leave: kfree(buffer); return USB_STOR_TRANSPORT_ERROR; } static int jumpshot_id_device(struct us_data *us, struct jumpshot_info *info) { unsigned char *command = us->iobuf; unsigned char *reply; int rc; if (!info) return USB_STOR_TRANSPORT_ERROR; command[0] = 0xE0; command[1] = 0xEC; reply = kmalloc(512, GFP_NOIO); if (!reply) return USB_STOR_TRANSPORT_ERROR; // send the setup rc = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, 0, 0x20, 0, 6, command, 2); if (rc != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Gah! send_control for read_capacity failed\n"); rc = USB_STOR_TRANSPORT_ERROR; goto leave; } // read the reply rc = jumpshot_bulk_read(us, reply, 512); if (rc != USB_STOR_XFER_GOOD) { rc = USB_STOR_TRANSPORT_ERROR; goto leave; } info->sectors = ((u32)(reply[117]) << 24) | ((u32)(reply[116]) << 16) | ((u32)(reply[115]) << 8) | ((u32)(reply[114]) ); rc = USB_STOR_TRANSPORT_GOOD; leave: kfree(reply); return rc; } static int jumpshot_handle_mode_sense(struct us_data *us, struct scsi_cmnd * srb, int sense_6) { static unsigned char rw_err_page[12] = { 0x1, 0xA, 0x21, 1, 0, 0, 0, 0, 1, 0, 0, 0 }; static unsigned char cache_page[12] = { 0x8, 0xA, 0x1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static unsigned char rbac_page[12] = { 0x1B, 0xA, 0, 0x81, 0, 0, 0, 0, 0, 0, 0, 0 }; static unsigned char timer_page[8] = { 0x1C, 0x6, 0, 0, 0, 0 }; unsigned char pc, page_code; unsigned int i = 0; struct jumpshot_info *info = (struct jumpshot_info *) (us->extra); unsigned char *ptr = us->iobuf; pc = srb->cmnd[2] >> 6; page_code = srb->cmnd[2] & 0x3F; switch (pc) { case 0x0: usb_stor_dbg(us, "Current values\n"); break; case 0x1: usb_stor_dbg(us, "Changeable values\n"); break; case 0x2: usb_stor_dbg(us, "Default values\n"); break; case 0x3: usb_stor_dbg(us, "Saves values\n"); break; } memset(ptr, 0, 8); if (sense_6) { ptr[2] = 0x00; // WP enable: 0x80 i = 4; } else { ptr[3] = 0x00; // WP enable: 0x80 i = 8; } switch (page_code) { case 0x0: // vendor-specific mode info->sense_key = 0x05; info->sense_asc = 0x24; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; case 0x1: memcpy(ptr + i, rw_err_page, sizeof(rw_err_page)); i += sizeof(rw_err_page); break; case 0x8: memcpy(ptr + i, cache_page, sizeof(cache_page)); i += sizeof(cache_page); break; case 0x1B: memcpy(ptr + i, rbac_page, sizeof(rbac_page)); i += sizeof(rbac_page); break; case 0x1C: memcpy(ptr + i, timer_page, sizeof(timer_page)); i += sizeof(timer_page); break; case 0x3F: memcpy(ptr + i, timer_page, sizeof(timer_page)); i += sizeof(timer_page); memcpy(ptr + i, rbac_page, sizeof(rbac_page)); i += sizeof(rbac_page); memcpy(ptr + i, cache_page, sizeof(cache_page)); i += sizeof(cache_page); memcpy(ptr + i, rw_err_page, sizeof(rw_err_page)); i += sizeof(rw_err_page); break; } if (sense_6) ptr[0] = i - 1; else ((__be16 *) ptr)[0] = cpu_to_be16(i - 2); usb_stor_set_xfer_buf(ptr, i, srb); return USB_STOR_TRANSPORT_GOOD; } static void jumpshot_info_destructor(void *extra) { // this routine is a placeholder... // currently, we don't allocate any extra blocks so we're okay } // Transport for the Lexar 'Jumpshot' // static int jumpshot_transport(struct scsi_cmnd *srb, struct us_data *us) { struct jumpshot_info *info; int rc; unsigned long block, blocks; unsigned char *ptr = us->iobuf; static unsigned char inquiry_response[8] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; if (!us->extra) { us->extra = kzalloc(sizeof(struct jumpshot_info), GFP_NOIO); if (!us->extra) return USB_STOR_TRANSPORT_ERROR; us->extra_destructor = jumpshot_info_destructor; } info = (struct jumpshot_info *) (us->extra); if (srb->cmnd[0] == INQUIRY) { usb_stor_dbg(us, "INQUIRY - Returning bogus response\n"); memcpy(ptr, inquiry_response, sizeof(inquiry_response)); fill_inquiry_response(us, ptr, 36); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == READ_CAPACITY) { info->ssize = 0x200; // hard coded 512 byte sectors as per ATA spec rc = jumpshot_get_status(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; rc = jumpshot_id_device(us, info); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; usb_stor_dbg(us, "READ_CAPACITY: %ld sectors, %ld bytes per sector\n", info->sectors, info->ssize); // build the reply // ((__be32 *) ptr)[0] = cpu_to_be32(info->sectors - 1); ((__be32 *) ptr)[1] = cpu_to_be32(info->ssize); usb_stor_set_xfer_buf(ptr, 8, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == MODE_SELECT_10) { usb_stor_dbg(us, "Gah! MODE_SELECT_10\n"); return USB_STOR_TRANSPORT_ERROR; } if (srb->cmnd[0] == READ_10) { block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[7]) << 8) | ((u32)(srb->cmnd[8])); usb_stor_dbg(us, "READ_10: read block 0x%04lx count %ld\n", block, blocks); return jumpshot_read_data(us, info, block, blocks); } if (srb->cmnd[0] == READ_12) { // I don't think we'll ever see a READ_12 but support it anyway... // block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[6]) << 24) | ((u32)(srb->cmnd[7]) << 16) | ((u32)(srb->cmnd[8]) << 8) | ((u32)(srb->cmnd[9])); usb_stor_dbg(us, "READ_12: read block 0x%04lx count %ld\n", block, blocks); return jumpshot_read_data(us, info, block, blocks); } if (srb->cmnd[0] == WRITE_10) { block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[7]) << 8) | ((u32)(srb->cmnd[8])); usb_stor_dbg(us, "WRITE_10: write block 0x%04lx count %ld\n", block, blocks); return jumpshot_write_data(us, info, block, blocks); } if (srb->cmnd[0] == WRITE_12) { // I don't think we'll ever see a WRITE_12 but support it anyway... // block = ((u32)(srb->cmnd[2]) << 24) | ((u32)(srb->cmnd[3]) << 16) | ((u32)(srb->cmnd[4]) << 8) | ((u32)(srb->cmnd[5])); blocks = ((u32)(srb->cmnd[6]) << 24) | ((u32)(srb->cmnd[7]) << 16) | ((u32)(srb->cmnd[8]) << 8) | ((u32)(srb->cmnd[9])); usb_stor_dbg(us, "WRITE_12: write block 0x%04lx count %ld\n", block, blocks); return jumpshot_write_data(us, info, block, blocks); } if (srb->cmnd[0] == TEST_UNIT_READY) { usb_stor_dbg(us, "TEST_UNIT_READY\n"); return jumpshot_get_status(us); } if (srb->cmnd[0] == REQUEST_SENSE) { usb_stor_dbg(us, "REQUEST_SENSE\n"); memset(ptr, 0, 18); ptr[0] = 0xF0; ptr[2] = info->sense_key; ptr[7] = 11; ptr[12] = info->sense_asc; ptr[13] = info->sense_ascq; usb_stor_set_xfer_buf(ptr, 18, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == MODE_SENSE) { usb_stor_dbg(us, "MODE_SENSE_6 detected\n"); return jumpshot_handle_mode_sense(us, srb, 1); } if (srb->cmnd[0] == MODE_SENSE_10) { usb_stor_dbg(us, "MODE_SENSE_10 detected\n"); return jumpshot_handle_mode_sense(us, srb, 0); } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { /* * sure. whatever. not like we can stop the user from popping * the media out of the device (no locking doors, etc) */ return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == START_STOP) { /* * this is used by sd.c'check_scsidisk_media_change to detect * media change */ usb_stor_dbg(us, "START_STOP\n"); /* * the first jumpshot_id_device after a media change returns * an error (determined experimentally) */ rc = jumpshot_id_device(us, info); if (rc == USB_STOR_TRANSPORT_GOOD) { info->sense_key = NO_SENSE; srb->result = SUCCESS; } else { info->sense_key = UNIT_ATTENTION; srb->result = SAM_STAT_CHECK_CONDITION; } return rc; } usb_stor_dbg(us, "Gah! Unknown command: %d (0x%x)\n", srb->cmnd[0], srb->cmnd[0]); info->sense_key = 0x05; info->sense_asc = 0x20; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } static struct scsi_host_template jumpshot_host_template; static int jumpshot_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - jumpshot_usb_ids) + jumpshot_unusual_dev_list, &jumpshot_host_template); if (result) return result; us->transport_name = "Lexar Jumpshot Control/Bulk"; us->transport = jumpshot_transport; us->transport_reset = usb_stor_Bulk_reset; us->max_lun = 1; result = usb_stor_probe2(us); return result; } static struct usb_driver jumpshot_driver = { .name = DRV_NAME, .probe = jumpshot_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = jumpshot_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(jumpshot_driver, jumpshot_host_template, DRV_NAME);
55 55 59 59 59 59 59 54 54 53 59 59 59 59 59 54 59 59 59 54 58 59 1 59 59 59 59 59 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 // SPDX-License-Identifier: GPL-2.0 /* * fs/mpage.c * * Copyright (C) 2002, Linus Torvalds. * * Contains functions related to preparing and submitting BIOs which contain * multiple pagecache pages. * * 15May2002 Andrew Morton * Initial version * 27Jun2002 axboe@suse.de * use bio_add_page() to build bio's just the right size */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/kdev_t.h> #include <linux/gfp.h> #include <linux/bio.h> #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/highmem.h> #include <linux/prefetch.h> #include <linux/mpage.h> #include <linux/mm_inline.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> #include "internal.h" /* * I/O completion handler for multipage BIOs. * * The mpage code never puts partial pages into a BIO (except for end-of-file). * If a page does not map to a contiguous run of blocks then it simply falls * back to block_read_full_folio(). * * Why is this? If a page's completion depends on a number of different BIOs * which can complete in any order (or at the same time) then determining the * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ static void mpage_read_end_io(struct bio *bio) { struct folio_iter fi; int err = blk_status_to_errno(bio->bi_status); bio_for_each_folio_all(fi, bio) folio_end_read(fi.folio, err == 0); bio_put(bio); } static void mpage_write_end_io(struct bio *bio) { struct folio_iter fi; int err = blk_status_to_errno(bio->bi_status); bio_for_each_folio_all(fi, bio) { if (err) mapping_set_error(fi.folio->mapping, err); folio_end_writeback(fi.folio); } bio_put(bio); } static struct bio *mpage_bio_submit_read(struct bio *bio) { bio->bi_end_io = mpage_read_end_io; guard_bio_eod(bio); submit_bio(bio); return NULL; } static struct bio *mpage_bio_submit_write(struct bio *bio) { bio->bi_end_io = mpage_write_end_io; guard_bio_eod(bio); submit_bio(bio); return NULL; } /* * support function for mpage_readahead. The fs supplied get_block might * return an up to date buffer. This is used to map that buffer into * the page, which allows read_folio to avoid triggering a duplicate call * to get_block. * * The idea is to avoid adding buffers to pages that don't already have * them. So when the buffer is up to date and the page size == block size, * this marks the page up to date instead of adding new buffers. */ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh, int page_block) { struct inode *inode = folio->mapping->host; struct buffer_head *page_bh, *head; int block = 0; head = folio_buffers(folio); if (!head) { /* * don't make any buffers if there is only one buffer on * the folio and the folio just needs to be set up to date */ if (inode->i_blkbits == PAGE_SHIFT && buffer_uptodate(bh)) { folio_mark_uptodate(folio); return; } head = create_empty_buffers(folio, i_blocksize(inode), 0); } page_bh = head; do { if (block == page_block) { page_bh->b_state = bh->b_state; page_bh->b_bdev = bh->b_bdev; page_bh->b_blocknr = bh->b_blocknr; break; } page_bh = page_bh->b_this_page; block++; } while (page_bh != head); } struct mpage_readpage_args { struct bio *bio; struct folio *folio; unsigned int nr_pages; bool is_readahead; sector_t last_block_in_bio; struct buffer_head map_bh; unsigned long first_logical_block; get_block_t *get_block; }; /* * This is the worker routine which does all the work of mapping the disk * blocks and constructs largest possible bios, submits them for IO if the * blocks are not contiguous on the disk. * * We pass a buffer_head back and forth and use its buffer_mapped() flag to * represent the validity of its disk mapping and to decide when to do the next * get_block() call. */ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) { struct folio *folio = args->folio; struct inode *inode = folio->mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; struct buffer_head *map_bh = &args->map_bh; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t first_block; unsigned page_block; unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; int length; int fully_mapped = 1; blk_opf_t opf = REQ_OP_READ; unsigned nblocks; unsigned relative_block; gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); /* MAX_BUF_PER_PAGE, for example */ VM_BUG_ON_FOLIO(folio_test_large(folio), folio); if (args->is_readahead) { opf |= REQ_RAHEAD; gfp |= __GFP_NORETRY | __GFP_NOWARN; } if (folio_buffers(folio)) goto confused; block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + args->nr_pages * blocks_per_page; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; /* * Map blocks using the result from the previous get_blocks call first. */ nblocks = map_bh->b_size >> blkbits; if (buffer_mapped(map_bh) && block_in_file > args->first_logical_block && block_in_file < (args->first_logical_block + nblocks)) { unsigned map_offset = block_in_file - args->first_logical_block; unsigned last = nblocks - map_offset; first_block = map_bh->b_blocknr + map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { clear_buffer_mapped(map_bh); break; } if (page_block == blocks_per_page) break; page_block++; block_in_file++; } bdev = map_bh->b_bdev; } /* * Then do more get_blocks calls until we are done with this folio. */ map_bh->b_folio = folio; while (page_block < blocks_per_page) { map_bh->b_state = 0; map_bh->b_size = 0; if (block_in_file < last_block) { map_bh->b_size = (last_block-block_in_file) << blkbits; if (args->get_block(inode, block_in_file, map_bh, 0)) goto confused; args->first_logical_block = block_in_file; } if (!buffer_mapped(map_bh)) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; page_block++; block_in_file++; continue; } /* some filesystems will copy data into the page during * the get_block call, in which case we don't want to * read it again. map_buffer_to_folio copies the data * we just collected from get_block into the folio's buffers * so read_folio doesn't have to repeat the get_block call */ if (buffer_uptodate(map_bh)) { map_buffer_to_folio(folio, map_bh, page_block); goto confused; } if (first_hole != blocks_per_page) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ if (!page_block) first_block = map_bh->b_blocknr; else if (first_block + page_block != map_bh->b_blocknr) goto confused; nblocks = map_bh->b_size >> blkbits; for (relative_block = 0; ; relative_block++) { if (relative_block == nblocks) { clear_buffer_mapped(map_bh); break; } else if (page_block == blocks_per_page) break; page_block++; block_in_file++; } bdev = map_bh->b_bdev; } if (first_hole != blocks_per_page) { folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE); if (first_hole == 0) { folio_mark_uptodate(folio); folio_unlock(folio); goto out; } } else if (fully_mapped) { folio_set_mappedtodisk(folio); } /* * This folio will go to BIO. Do we need to send this BIO off first? */ if (args->bio && (args->last_block_in_bio != first_block - 1)) args->bio = mpage_bio_submit_read(args->bio); alloc_new: if (args->bio == NULL) { args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf, gfp); if (args->bio == NULL) goto confused; args->bio->bi_iter.bi_sector = first_block << (blkbits - 9); } length = first_hole << blkbits; if (!bio_add_folio(args->bio, folio, length, 0)) { args->bio = mpage_bio_submit_read(args->bio); goto alloc_new; } relative_block = block_in_file - args->first_logical_block; nblocks = map_bh->b_size >> blkbits; if ((buffer_boundary(map_bh) && relative_block == nblocks) || (first_hole != blocks_per_page)) args->bio = mpage_bio_submit_read(args->bio); else args->last_block_in_bio = first_block + blocks_per_page - 1; out: return args->bio; confused: if (args->bio) args->bio = mpage_bio_submit_read(args->bio); if (!folio_test_uptodate(folio)) block_read_full_folio(folio, args->get_block); else folio_unlock(folio); goto out; } /** * mpage_readahead - start reads against pages * @rac: Describes which pages to read. * @get_block: The filesystem's block mapper function. * * This function walks the pages and the blocks within each page, building and * emitting large BIOs. * * If anything unusual happens, such as: * * - encountering a page which has buffers * - encountering a page which has a non-hole after a hole * - encountering a page with non-contiguous blocks * * then this code just gives up and calls the buffer_head-based read function. * It does handle a page which has holes at the end - that is a common case: * the end-of-file on blocksize < PAGE_SIZE setups. * * BH_Boundary explanation: * * There is a problem. The mpage read code assembles several pages, gets all * their disk mappings, and then submits them all. That's fine, but obtaining * the disk mappings may require I/O. Reads of indirect blocks, for example. * * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be * submitted in the following order: * * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 * * because the indirect block has to be read to get the mappings of blocks * 13,14,15,16. Obviously, this impacts performance. * * So what we do it to allow the filesystem's get_block() function to set * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block * after this one will require I/O against a block which is probably close to * this one. So you should push what I/O you have currently accumulated. * * This all causes the disk requests to be issued in the correct order. */ void mpage_readahead(struct readahead_control *rac, get_block_t get_block) { struct folio *folio; struct mpage_readpage_args args = { .get_block = get_block, .is_readahead = true, }; while ((folio = readahead_folio(rac))) { prefetchw(&folio->flags); args.folio = folio; args.nr_pages = readahead_count(rac); args.bio = do_mpage_readpage(&args); } if (args.bio) mpage_bio_submit_read(args.bio); } EXPORT_SYMBOL(mpage_readahead); /* * This isn't called much at all */ int mpage_read_folio(struct folio *folio, get_block_t get_block) { struct mpage_readpage_args args = { .folio = folio, .nr_pages = 1, .get_block = get_block, }; args.bio = do_mpage_readpage(&args); if (args.bio) mpage_bio_submit_read(args.bio); return 0; } EXPORT_SYMBOL(mpage_read_folio); /* * Writing is not so simple. * * If the page has buffers then they will be used for obtaining the disk * mapping. We only support pages which are fully mapped-and-dirty, with a * special case for pages which are unmapped at the end: end-of-file. * * If the page has no buffers (preferred) then the page is mapped here. * * If all blocks are found to be contiguous then the page can go into the * BIO. Otherwise fall back to the mapping's writepage(). * * FIXME: This code wants an estimate of how many pages are still to be * written, so it can intelligently allocate a suitably-sized BIO. For now, * just allocate full-size (16-page) BIOs. */ struct mpage_data { struct bio *bio; sector_t last_block_in_bio; get_block_t *get_block; }; /* * We have our BIO, so we can now mark the buffers clean. Make * sure to only clean buffers which we know we'll be writing. */ static void clean_buffers(struct folio *folio, unsigned first_unmapped) { unsigned buffer_counter = 0; struct buffer_head *bh, *head = folio_buffers(folio); if (!head) return; bh = head; do { if (buffer_counter++ == first_unmapped) break; clear_buffer_dirty(bh); bh = bh->b_this_page; } while (bh != head); /* * we cannot drop the bh if the page is not uptodate or a concurrent * read_folio would fail to serialize with the bh and it would read from * disk before we reach the platter. */ if (buffer_heads_over_limit && folio_test_uptodate(folio)) try_to_free_buffers(folio); } static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { struct mpage_data *mpd = data; struct bio *bio = mpd->bio; struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; sector_t last_block; sector_t block_in_file; sector_t first_block; unsigned page_block; unsigned first_unmapped = blocks_per_page; struct block_device *bdev = NULL; int boundary = 0; sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; size_t length; struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; struct buffer_head *head = folio_buffers(folio); if (head) { struct buffer_head *bh = head; /* If they're all mapped and dirty, do it */ page_block = 0; do { BUG_ON(buffer_locked(bh)); if (!buffer_mapped(bh)) { /* * unmapped dirty buffers are created by * block_dirty_folio -> mmapped data */ if (buffer_dirty(bh)) goto confused; if (first_unmapped == blocks_per_page) first_unmapped = page_block; continue; } if (first_unmapped != blocks_per_page) goto confused; /* hole -> non-hole */ if (!buffer_dirty(bh) || !buffer_uptodate(bh)) goto confused; if (page_block) { if (bh->b_blocknr != first_block + page_block) goto confused; } else { first_block = bh->b_blocknr; } page_block++; boundary = buffer_boundary(bh); if (boundary) { boundary_block = bh->b_blocknr; boundary_bdev = bh->b_bdev; } bdev = bh->b_bdev; } while ((bh = bh->b_this_page) != head); if (first_unmapped) goto page_is_mapped; /* * Page has buffers, but they are all unmapped. The page was * created by pagein or read over a hole which was handled by * block_read_full_folio(). If this address_space is also * using mpage_readahead then this can rarely happen. */ goto confused; } /* * The page has no buffers: map it to disk */ BUG_ON(!folio_test_uptodate(folio)); block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); /* * Whole page beyond EOF? Skip allocating blocks to avoid leaking * space. */ if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits) goto page_is_mapped; last_block = (i_size - 1) >> blkbits; map_bh.b_folio = folio; for (page_block = 0; page_block < blocks_per_page; ) { map_bh.b_state = 0; map_bh.b_size = 1 << blkbits; if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (!buffer_mapped(&map_bh)) goto confused; if (buffer_new(&map_bh)) clean_bdev_bh_alias(&map_bh); if (buffer_boundary(&map_bh)) { boundary_block = map_bh.b_blocknr; boundary_bdev = map_bh.b_bdev; } if (page_block) { if (map_bh.b_blocknr != first_block + page_block) goto confused; } else { first_block = map_bh.b_blocknr; } page_block++; boundary = buffer_boundary(&map_bh); bdev = map_bh.b_bdev; if (block_in_file == last_block) break; block_in_file++; } BUG_ON(page_block == 0); first_unmapped = page_block; page_is_mapped: /* Don't bother writing beyond EOF, truncate will discard the folio */ if (folio_pos(folio) >= i_size) goto confused; length = folio_size(folio); if (folio_pos(folio) + length > i_size) { /* * The page straddles i_size. It must be zeroed out on each * and every writepage invocation because it may be mmapped. * "A file is mapped in multiples of the page size. For a file * that is not a multiple of the page size, the remaining memory * is zeroed when mapped, and writes to that region are not * written out to the file." */ length = i_size - folio_pos(folio); folio_zero_segment(folio, length, folio_size(folio)); } /* * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != first_block - 1) bio = mpage_bio_submit_write(bio); alloc_new: if (bio == NULL) { bio = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE | wbc_to_write_flags(wbc), GFP_NOFS); bio->bi_iter.bi_sector = first_block << (blkbits - 9); wbc_init_bio(wbc, bio); bio->bi_write_hint = inode->i_write_hint; } /* * Must try to add the page before marking the buffer clean or * the confused fail path above (OOM) will be very confused when * it finds all bh marked clean (i.e. it will not write anything) */ wbc_account_cgroup_owner(wbc, folio, folio_size(folio)); length = first_unmapped << blkbits; if (!bio_add_folio(bio, folio, length, 0)) { bio = mpage_bio_submit_write(bio); goto alloc_new; } clean_buffers(folio, first_unmapped); BUG_ON(folio_test_writeback(folio)); folio_start_writeback(folio); folio_unlock(folio); if (boundary || (first_unmapped != blocks_per_page)) { bio = mpage_bio_submit_write(bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); } } else { mpd->last_block_in_bio = first_block + blocks_per_page - 1; } goto out; confused: if (bio) bio = mpage_bio_submit_write(bio); /* * The caller has a ref on the inode, so *mapping is stable */ ret = block_write_full_folio(folio, wbc, mpd->get_block); mapping_set_error(mapping, ret); out: mpd->bio = bio; return ret; } /** * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. * * This is a library function, which implements the writepages() * address_space_operation. */ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { struct mpage_data mpd = { .get_block = get_block, }; struct blk_plug plug; int ret; blk_start_plug(&plug); ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); if (mpd.bio) mpage_bio_submit_write(mpd.bio); blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL(mpage_writepages);
3019 2943 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0 /* * All the USB notify logic * * (C) Copyright 2005 Greg Kroah-Hartman <gregkh@suse.de> * * notifier functions originally based on those in kernel/sys.c * but fixed up to not be so broken. * * Released under the GPLv2 only. */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/notifier.h> #include <linux/usb.h> #include <linux/mutex.h> #include "usb.h" static BLOCKING_NOTIFIER_HEAD(usb_notifier_list); /** * usb_register_notify - register a notifier callback whenever a usb change happens * @nb: pointer to the notifier block for the callback events. * * These changes are either USB devices or busses being added or removed. */ void usb_register_notify(struct notifier_block *nb) { blocking_notifier_chain_register(&usb_notifier_list, nb); } EXPORT_SYMBOL_GPL(usb_register_notify); /** * usb_unregister_notify - unregister a notifier callback * @nb: pointer to the notifier block for the callback events. * * usb_register_notify() must have been previously called for this function * to work properly. */ void usb_unregister_notify(struct notifier_block *nb) { blocking_notifier_chain_unregister(&usb_notifier_list, nb); } EXPORT_SYMBOL_GPL(usb_unregister_notify); void usb_notify_add_device(struct usb_device *udev) { blocking_notifier_call_chain(&usb_notifier_list, USB_DEVICE_ADD, udev); } void usb_notify_remove_device(struct usb_device *udev) { blocking_notifier_call_chain(&usb_notifier_list, USB_DEVICE_REMOVE, udev); } void usb_notify_add_bus(struct usb_bus *ubus) { blocking_notifier_call_chain(&usb_notifier_list, USB_BUS_ADD, ubus); } void usb_notify_remove_bus(struct usb_bus *ubus) { blocking_notifier_call_chain(&usb_notifier_list, USB_BUS_REMOVE, ubus); }
1456 1368 155 77 1457 56 2 3 5 6 2 12 2 1 85 171 172 50 134 134 48 7 6 4 3 8 8 7 1 204 139 139 71 99 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 // SPDX-License-Identifier: GPL-2.0-or-later /* * Generic parts * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/init.h> #include <linux/llc.h> #include <net/llc.h> #include <net/stp.h> #include <net/switchdev.h> #include "br_private.h" /* * Handle changes in state of network devices enslaved to a bridge. * * Note: don't care about up/down if bridge itself is down, because * port state is checked when bridge is brought up. */ static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct netdev_notifier_pre_changeaddr_info *prechaddr_info; struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; bool notified = false; bool changed_addr; int err; if (netif_is_bridge_master(dev)) { err = br_vlan_bridge_event(dev, event, ptr); if (err) return notifier_from_errno(err); if (event == NETDEV_REGISTER) { /* register of bridge completed, add sysfs entries */ err = br_sysfs_addbr(dev); if (err) return notifier_from_errno(err); return NOTIFY_DONE; } } /* not a port of a bridge */ p = br_port_get_rtnl(dev); if (!p) return NOTIFY_DONE; br = p->br; switch (event) { case NETDEV_CHANGEMTU: br_mtu_auto_adjust(br); break; case NETDEV_PRE_CHANGEADDR: if (br->dev->addr_assign_type == NET_ADDR_SET) break; prechaddr_info = ptr; err = dev_pre_changeaddr_notify(br->dev, prechaddr_info->dev_addr, extack); if (err) return notifier_from_errno(err); break; case NETDEV_CHANGEADDR: spin_lock_bh(&br->lock); br_fdb_changeaddr(p, dev->dev_addr); changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); if (changed_addr) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); break; case NETDEV_CHANGE: br_port_carrier_check(p, &notified); break; case NETDEV_FEAT_CHANGE: netdev_update_features(br->dev); break; case NETDEV_DOWN: spin_lock_bh(&br->lock); if (br->dev->flags & IFF_UP) { br_stp_disable_port(p); notified = true; } spin_unlock_bh(&br->lock); break; case NETDEV_UP: if (netif_running(br->dev) && netif_oper_up(dev)) { spin_lock_bh(&br->lock); br_stp_enable_port(p); notified = true; spin_unlock_bh(&br->lock); } break; case NETDEV_UNREGISTER: br_del_if(br, dev); break; case NETDEV_CHANGENAME: err = br_sysfs_renameif(p); if (err) return notifier_from_errno(err); break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlying device to change its type. */ return NOTIFY_BAD; case NETDEV_RESEND_IGMP: /* Propagate to master device */ call_netdevice_notifiers(event, br->dev); break; } if (event != NETDEV_UNREGISTER) br_vlan_port_event(p, event); /* Events that may cause spanning tree to refresh */ if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP || event == NETDEV_CHANGE || event == NETDEV_DOWN)) br_ifinfo_notify(RTM_NEWLINK, NULL, p); return NOTIFY_DONE; } static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; /* called with RTNL or RCU */ static int br_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; p = br_port_get_rtnl_rcu(dev); if (!p) goto out; br = p->br; switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, fdb_info->vid, fdb_info->locked, false); if (err) { err = notifier_from_errno(err); break; } br_fdb_offloaded_set(br, p, fdb_info->addr, fdb_info->vid, fdb_info->offloaded); break; case SWITCHDEV_FDB_DEL_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, fdb_info->vid, false); if (err) err = notifier_from_errno(err); break; case SWITCHDEV_FDB_OFFLOADED: fdb_info = ptr; br_fdb_offloaded_set(br, p, fdb_info->addr, fdb_info->vid, fdb_info->offloaded); break; case SWITCHDEV_FDB_FLUSH_TO_BRIDGE: fdb_info = ptr; /* Don't delete static entries */ br_fdb_delete_by_port(br, p, fdb_info->vid, 0); break; } out: return err; } static struct notifier_block br_switchdev_notifier = { .notifier_call = br_switchdev_event, }; /* called under rtnl_mutex */ static int br_switchdev_blocking_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct switchdev_notifier_brport_info *brport_info; const struct switchdev_brport *b; struct net_bridge_port *p; int err = NOTIFY_DONE; p = br_port_get_rtnl(dev); if (!p) goto out; switch (event) { case SWITCHDEV_BRPORT_OFFLOADED: brport_info = ptr; b = &brport_info->brport; err = br_switchdev_port_offload(p, b->dev, b->ctx, b->atomic_nb, b->blocking_nb, b->tx_fwd_offload, extack); err = notifier_from_errno(err); break; case SWITCHDEV_BRPORT_UNOFFLOADED: brport_info = ptr; b = &brport_info->brport; br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb, b->blocking_nb); break; case SWITCHDEV_BRPORT_REPLAY: brport_info = ptr; b = &brport_info->brport; err = br_switchdev_port_replay(p, b->dev, b->ctx, b->atomic_nb, b->blocking_nb, extack); err = notifier_from_errno(err); break; } out: return err; } static struct notifier_block br_switchdev_blocking_notifier = { .notifier_call = br_switchdev_blocking_event, }; /* br_boolopt_toggle - change user-controlled boolean option * * @br: bridge device * @opt: id of the option to change * @on: new option value * @extack: extack for error messages * * Changes the value of the respective boolean option to @on taking care of * any internal option value mapping and configuration. */ int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, struct netlink_ext_ack *extack) { int err = 0; switch (opt) { case BR_BOOLOPT_NO_LL_LEARN: br_opt_toggle(br, BROPT_NO_LL_LEARN, on); break; case BR_BOOLOPT_MCAST_VLAN_SNOOPING: err = br_multicast_toggle_vlan_snooping(br, on, extack); break; case BR_BOOLOPT_MST_ENABLE: err = br_mst_set_enabled(br, on, extack); break; default: /* shouldn't be called with unsupported options */ WARN_ON(1); break; } return err; } int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) { switch (opt) { case BR_BOOLOPT_NO_LL_LEARN: return br_opt_get(br, BROPT_NO_LL_LEARN); case BR_BOOLOPT_MCAST_VLAN_SNOOPING: return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED); case BR_BOOLOPT_MST_ENABLE: return br_opt_get(br, BROPT_MST_ENABLED); default: /* shouldn't be called with unsupported options */ WARN_ON(1); break; } return 0; } int br_boolopt_multi_toggle(struct net_bridge *br, struct br_boolopt_multi *bm, struct netlink_ext_ack *extack) { unsigned long bitmap = bm->optmask; int err = 0; int opt_id; for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) { bool on = !!(bm->optval & BIT(opt_id)); err = br_boolopt_toggle(br, opt_id, on, extack); if (err) { br_debug(br, "boolopt multi-toggle error: option: %d current: %d new: %d error: %d\n", opt_id, br_boolopt_get(br, opt_id), on, err); break; } } return err; } void br_boolopt_multi_get(const struct net_bridge *br, struct br_boolopt_multi *bm) { u32 optval = 0; int opt_id; for (opt_id = 0; opt_id < BR_BOOLOPT_MAX; opt_id++) optval |= (br_boolopt_get(br, opt_id) << opt_id); bm->optval = optval; bm->optmask = GENMASK((BR_BOOLOPT_MAX - 1), 0); } /* private bridge options, controlled by the kernel */ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) { bool cur = !!br_opt_get(br, opt); br_debug(br, "toggle option: %d state: %d -> %d\n", opt, cur, on); if (cur == on) return; if (on) set_bit(opt, &br->options); else clear_bit(opt, &br->options); } static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list, struct list_head *dev_to_kill) { struct net_device *dev; struct net *net; ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) for_each_netdev(net, dev) if (netif_is_bridge_master(dev)) br_dev_delete(dev, dev_to_kill); } static struct pernet_operations br_net_ops = { .exit_batch_rtnl = br_net_exit_batch_rtnl, }; static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; static int __init br_init(void) { int err; BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > sizeof_field(struct sk_buff, cb)); err = stp_proto_register(&br_stp_proto); if (err < 0) { pr_err("bridge: can't register sap for STP\n"); return err; } err = br_fdb_init(); if (err) goto err_out; err = register_pernet_subsys(&br_net_ops); if (err) goto err_out1; err = br_nf_core_init(); if (err) goto err_out2; err = register_netdevice_notifier(&br_device_notifier); if (err) goto err_out3; err = register_switchdev_notifier(&br_switchdev_notifier); if (err) goto err_out4; err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); if (err) goto err_out5; err = br_netlink_init(); if (err) goto err_out6; brioctl_set(br_ioctl_stub); #if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = br_fdb_test_addr; #endif #if IS_MODULE(CONFIG_BRIDGE_NETFILTER) pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " "by default. Update your scripts to load br_netfilter if you " "need this.\n"); #endif return 0; err_out6: unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); err_out5: unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: br_nf_core_fini(); err_out2: unregister_pernet_subsys(&br_net_ops); err_out1: br_fdb_fini(); err_out: stp_proto_unregister(&br_stp_proto); return err; } static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ br_nf_core_fini(); #if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = NULL; #endif br_fdb_fini(); } module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); MODULE_VERSION(BR_VERSION); MODULE_ALIAS_RTNL_LINK("bridge"); MODULE_DESCRIPTION("Ethernet bridge driver");
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Weighted Fail Over module * * Authors: Kenny Mathis <kmathis@chokepoint.net> * * Changes: * Kenny Mathis : added initial functionality based on weight */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> /* Weighted Fail Over Module */ static struct ip_vs_dest * ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *hweight = NULL; int hw = 0; /* Track highest weight */ IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n"); /* Basic failover functionality * Find virtual server with highest weight and send it traffic */ list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > hw) { hweight = dest; hw = atomic_read(&dest->weight); } } if (hweight) { IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n", IP_VS_DBG_ADDR(hweight->af, &hweight->addr), ntohs(hweight->port), atomic_read(&hweight->activeconns), atomic_read(&hweight->weight)); return hweight; } ip_vs_scheduler_err(svc, "no destination available"); return NULL; } static struct ip_vs_scheduler ip_vs_fo_scheduler = { .name = "fo", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list), .schedule = ip_vs_fo_schedule, }; static int __init ip_vs_fo_init(void) { return register_ip_vs_scheduler(&ip_vs_fo_scheduler); } static void __exit ip_vs_fo_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_fo_scheduler); synchronize_rcu(); } module_init(ip_vs_fo_init); module_exit(ip_vs_fo_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs weighted failover scheduler");
21 21 21 7 21 21 7 7 7 7 11 508 507 506 92 91 1 511 509 510 473 473 1 1 1 107 107 106 81 80 1 1 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 // SPDX-License-Identifier: GPL-2.0 /* * Implementation of the diskquota system for the LINUX operating system. QUOTA * is implemented using the BSD system call interface as the means of * communication with the user level. This file contains the generic routines * called by the different filesystems on allocation of an inode or block. * These routines take care of the administration needed to have a consistent * diskquota tracking system. The ideas of both user and group quotas are based * on the Melbourne quota system as used on BSD derived systems. The internal * implementation is based on one of the several variants of the LINUX * inode-subsystem with added complexity of the diskquota system. * * Author: Marco van Wieringen <mvw@planets.elm.net> * * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 * * Revised list management to avoid races * -- Bill Hawes, <whawes@star.net>, 9/98 * * Fixed races in dquot_transfer(), dqget() and dquot_alloc_...(). * As the consequence the locking was moved from dquot_decr_...(), * dquot_incr_...() to calling functions. * invalidate_dquots() now writes modified dquots. * Serialized quota_off() and quota_on() for mount point. * Fixed a few bugs in grow_dquots(). * Fixed deadlock in write_dquot() - we no longer account quotas on * quota files * remove_dquot_ref() moved to inode.c - it now traverses through inodes * add_dquot_ref() restarts after blocking * Added check for bogus uid and fixed check for group in quotactl. * Jan Kara, <jack@suse.cz>, sponsored by SuSE CR, 10-11/99 * * Used struct list_head instead of own list struct * Invalidation of referenced dquots is no longer possible * Improved free_dquots list management * Quota and i_blocks are now updated in one place to avoid races * Warnings are now delayed so we won't block in critical section * Write updated not to require dquot lock * Jan Kara, <jack@suse.cz>, 9/2000 * * Added dynamic quota structure allocation * Jan Kara <jack@suse.cz> 12/2000 * * Rewritten quota interface. Implemented new quota format and * formats registering. * Jan Kara, <jack@suse.cz>, 2001,2002 * * New SMP locking. * Jan Kara, <jack@suse.cz>, 10/2002 * * Added journalled quota support, fix lock inversion problems * Jan Kara, <jack@suse.cz>, 2003,2004 * * (C) Copyright 1994 - 1997 Marco van Wieringen */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/mm.h> #include <linux/time.h> #include <linux/types.h> #include <linux/string.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/tty.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/kmod.h> #include <linux/namei.h> #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/blkdev.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> /* * There are five quota SMP locks: * * dq_list_lock protects all lists with quotas and quota formats. * * dquot->dq_dqb_lock protects data from dq_dqb * * inode->i_lock protects inode->i_blocks, i_bytes and also guards * consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that * dquot_transfer() can stabilize amount it transfers * * dq_data_lock protects mem_dqinfo structures and modifications of dquot * pointers in the inode * * dq_state_lock protects modifications of quota state (on quotaon and * quotaoff) and readers who care about latest values take it as well. * * The spinlock ordering is hence: * dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock, * dq_list_lock > dq_state_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * * Operation accessing dquots via inode pointers are protected by dquot_srcu. * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and * synchronize_srcu(&dquot_srcu) is called after clearing pointers from * inode and before dropping dquot references to avoid use of dquots after * they are freed. dq_data_lock is used to serialize the pointer setting and * clearing operations. * Special care needs to be taken about S_NOQUOTA inode flag (marking that * inode is a quota file). Functions adding pointers from inode to dquots have * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they * have to do all pointer modifications before dropping dq_data_lock. This makes * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * * Each dquot has its dq_lock mutex. Dquot is locked when it is being read to * memory (or space for it is being allocated) on the first dqget(), when it is * being written out, and when it is being released on the last dqput(). The * allocation and release operations are serialized by the dq_lock and by * checking the use count in dquot_release(). * * Lock ordering (including related VFS locks) is the following: * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); DEFINE_STATIC_SRCU(dquot_srcu); static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq); void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) { if (printk_ratelimit()) { va_list args; struct va_format vaf; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_ERR "Quota error (device %s): %s: %pV\n", sb->s_id, func, &vaf); va_end(args); } } EXPORT_SYMBOL(__quota_error); #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) static char *quotatypes[] = INITQFNAMES; #endif static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; /* SLAB cache for dquot structures */ static struct kmem_cache *dquot_cachep; void register_quota_format(struct quota_format_type *fmt) { spin_lock(&dq_list_lock); fmt->qf_next = quota_formats; quota_formats = fmt; spin_unlock(&dq_list_lock); } EXPORT_SYMBOL(register_quota_format); void unregister_quota_format(struct quota_format_type *fmt) { struct quota_format_type **actqf; spin_lock(&dq_list_lock); for (actqf = &quota_formats; *actqf && *actqf != fmt; actqf = &(*actqf)->qf_next) ; if (*actqf) *actqf = (*actqf)->qf_next; spin_unlock(&dq_list_lock); } EXPORT_SYMBOL(unregister_quota_format); static struct quota_format_type *find_quota_format(int id) { struct quota_format_type *actqf; spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next) ; if (!actqf || !try_module_get(actqf->qf_owner)) { int qm; spin_unlock(&dq_list_lock); for (qm = 0; module_names[qm].qm_fmt_id && module_names[qm].qm_fmt_id != id; qm++) ; if (!module_names[qm].qm_fmt_id || request_module(module_names[qm].qm_mod_name)) return NULL; spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next) ; if (actqf && !try_module_get(actqf->qf_owner)) actqf = NULL; } spin_unlock(&dq_list_lock); return actqf; } static void put_quota_format(struct quota_format_type *fmt) { module_put(fmt->qf_owner); } /* * Dquot List Management: * The quota code uses five lists for dquot management: the inuse_list, * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array. * A single dquot structure may be on some of those lists, depending on * its current state. * * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * * When the last reference of a dquot is dropped, the dquot is added to * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the * dquots on the list. Each cleaned up dquot is moved to free_dquots list. * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot * struct. * * Unused and cleaned up dquots are in the free_dquots list and this list is * searched whenever we need an available dquot. Dquots are removed from the * list as soon as they are used again and dqstats.free_dquots gives the number * of dquots on the list. When dquot is invalidated it's completely released * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to * quota file. Note that some filesystems do dirty dquot tracking on their * own (e.g. in a journal) and thus don't use dqi_dirty_list. * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. */ static LIST_HEAD(inuse_list); static LIST_HEAD(free_dquots); static LIST_HEAD(releasing_dquots); static unsigned int dq_hash_bits, dq_hash_mask; static struct hlist_head *dquot_hash; struct dqstats dqstats; EXPORT_SYMBOL(dqstats); static qsize_t inode_get_rsv_space(struct inode *inode); static qsize_t __inode_get_rsv_space(struct inode *inode); static int __dquot_initialize(struct inode *inode, int type); static void quota_release_workfn(struct work_struct *work); static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn); static inline unsigned int hashfn(const struct super_block *sb, struct kqid qid) { unsigned int id = from_kqid(&init_user_ns, qid); int type = qid.type; unsigned long tmp; tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask; } /* * Following list functions expect dq_list_lock to be held */ static inline void insert_dquot_hash(struct dquot *dquot) { struct hlist_head *head; head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id); hlist_add_head(&dquot->dq_hash, head); } static inline void remove_dquot_hash(struct dquot *dquot) { hlist_del_init(&dquot->dq_hash); } static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, struct kqid qid) { struct dquot *dquot; hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash) if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) return dquot; return NULL; } /* Add a dquot to the tail of the free list */ static inline void put_dquot_last(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &free_dquots); dqstats_inc(DQST_FREE_DQUOTS); } static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) { if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); else clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) { /* We add to the back of inuse list so we don't have to restart * when traversing this list and we block */ list_add_tail(&dquot->dq_inuse, &inuse_list); dqstats_inc(DQST_ALLOC_DQUOTS); } static inline void remove_inuse(struct dquot *dquot) { dqstats_dec(DQST_ALLOC_DQUOTS); list_del(&dquot->dq_inuse); } /* * End of list functions needing dq_list_lock */ static void wait_on_dquot(struct dquot *dquot) { mutex_lock(&dquot->dq_lock); mutex_unlock(&dquot->dq_lock); } static inline int dquot_active(struct dquot *dquot) { return test_bit(DQ_ACTIVE_B, &dquot->dq_flags); } static inline int dquot_dirty(struct dquot *dquot) { return test_bit(DQ_MOD_B, &dquot->dq_flags); } static inline int mark_dquot_dirty(struct dquot *dquot) { return dquot->dq_sb->dq_op->mark_dirty(dquot); } /* Mark dquot dirty in atomic manner, and return it's old dirty flag state */ int dquot_mark_dquot_dirty(struct dquot *dquot) { int ret = 1; if (!dquot_active(dquot)) return 0; if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags); /* If quota is dirty already, we don't have to acquire dq_list_lock */ if (dquot_dirty(dquot)) return 1; spin_lock(&dq_list_lock); if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> info[dquot->dq_id.type].dqi_dirty_list); ret = 0; } spin_unlock(&dq_list_lock); return ret; } EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots) { int ret, err, cnt; struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) /* Even in case of error we have to continue */ ret = mark_dquot_dirty(dquot); if (!err && ret < 0) err = ret; } return err; } static inline void dqput_all(struct dquot **dquot) { unsigned int cnt; for (cnt = 0; cnt < MAXQUOTAS; cnt++) dqput(dquot[cnt]); } static inline int clear_dquot_dirty(struct dquot *dquot) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags); spin_lock(&dq_list_lock); if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); return 0; } list_del_init(&dquot->dq_dirty); spin_unlock(&dq_list_lock); return 1; } void mark_info_dirty(struct super_block *sb, int type) { spin_lock(&dq_data_lock); sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY; spin_unlock(&dq_data_lock); } EXPORT_SYMBOL(mark_info_dirty); /* * Read dquot from disk and alloc space for it */ int dquot_acquire(struct dquot *dquot) { int ret = 0, ret2 = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); if (ret < 0) goto out_iolock; } /* Make sure flags update is visible after dquot has been filled */ smp_mb__before_atomic(); set_bit(DQ_READ_B, &dquot->dq_flags); /* Instantiate dquot if needed */ if (!dquot_active(dquot) && !dquot->dq_off) { ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); /* Write the info if needed */ if (info_dirty(&dqopt->info[dquot->dq_id.type])) { ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( dquot->dq_sb, dquot->dq_id.type); } if (ret < 0) goto out_iolock; if (ret2 < 0) { ret = ret2; goto out_iolock; } } /* * Make sure flags update is visible after on-disk struct has been * allocated. Paired with smp_rmb() in dqget(). */ smp_mb__before_atomic(); set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_iolock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_acquire); /* * Write dquot to disk */ int dquot_commit(struct dquot *dquot) { int ret = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); if (!clear_dquot_dirty(dquot)) goto out_lock; /* Inactive dquot can be only if there was error during read/init * => we have better not writing it */ if (dquot_active(dquot)) ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); else ret = -EIO; out_lock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_commit); /* * Release dquot */ int dquot_release(struct dquot *dquot) { int ret = 0, ret2 = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); /* Check whether we are not racing with some other dqget() */ if (dquot_is_busy(dquot)) goto out_dqlock; if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); /* Write the info */ if (info_dirty(&dqopt->info[dquot->dq_id.type])) { ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( dquot->dq_sb, dquot->dq_id.type); } if (ret >= 0) ret = ret2; } clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_dqlock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_release); void dquot_destroy(struct dquot *dquot) { kmem_cache_free(dquot_cachep, dquot); } EXPORT_SYMBOL(dquot_destroy); static inline void do_destroy_dquot(struct dquot *dquot) { dquot->dq_sb->dq_op->destroy_dquot(dquot); } /* Invalidate all dquots on the list. Note that this function is called after * quota is disabled and pointers from inodes removed so there cannot be new * quota users. There can still be some users of quotas due to inodes being * just deleted or pruned by prune_icache() (those are not attached to any * list) or parallel quotactl call. We have to wait for such users. */ static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot, *tmp; restart: flush_delayed_work(&quota_release_work); spin_lock(&dq_list_lock); list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { if (dquot->dq_sb != sb) continue; if (dquot->dq_id.type != type) continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* * Once dqput() wakes us up, we know it's time to free * the dquot. * IMPORTANT: we rely on the fact that there is always * at most one process waiting for dquot to free. * Otherwise dq_count would be > 1 and we would never * wake up. */ wait_event(dquot_ref_wq, atomic_read(&dquot->dq_count) == 1); dqput(dquot); /* At this moment dquot() need not exist (it could be * reclaimed by prune_dqcache(). Hence we must * restart. */ goto restart; } /* * The last user already dropped its reference but dquot didn't * get fully cleaned up yet. Restart the scan which flushes the * work cleaning up released dquots. */ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); goto restart; } /* * Quota now has no users and it has been written on last * dqput() */ remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); } spin_unlock(&dq_list_lock); } /* Call callback for every active dquot on given filesystem */ int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), unsigned long priv) { struct dquot *dquot, *old_dquot = NULL; int ret = 0; WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); spin_lock(&dq_list_lock); list_for_each_entry(dquot, &inuse_list, dq_inuse) { if (!dquot_active(dquot)) continue; if (dquot->dq_sb != sb) continue; /* Now we have active dquot so we can just increase use count */ atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqput(old_dquot); old_dquot = dquot; /* * ->release_dquot() can be racing with us. Our reference * protects us from new calls to it so just wait for any * outstanding call and recheck the DQ_ACTIVE_B after that. */ wait_on_dquot(dquot); if (dquot_active(dquot)) { ret = fn(dquot, priv); if (ret < 0) goto out; } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ } spin_unlock(&dq_list_lock); out: dqput(old_dquot); return ret; } EXPORT_SYMBOL(dquot_scan_active); static inline int dquot_write_dquot(struct dquot *dquot) { int ret = dquot->dq_sb->dq_op->write_dquot(dquot); if (ret < 0) { quota_error(dquot->dq_sb, "Can't write quota structure " "(error %d). Quota may get out of sync!", ret); /* Clear dirty bit anyway to avoid infinite loop. */ clear_dquot_dirty(dquot); } return ret; } /* Write all dquot structures to quota files */ int dquot_writeback_dquots(struct super_block *sb, int type) { struct list_head dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; int err, ret = 0; WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); flush_delayed_work(&quota_release_work); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; spin_lock(&dq_list_lock); /* Move list away to avoid livelock. */ list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty); while (!list_empty(&dirty)) { dquot = list_first_entry(&dirty, struct dquot, dq_dirty); WARN_ON(!dquot_active(dquot)); /* If the dquot is releasing we should not touch it */ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); flush_delayed_work(&quota_release_work); spin_lock(&dq_list_lock); continue; } /* Now we have active dquot from which someone is * holding reference so we can safely just increase * use count */ dqgrab(dquot); spin_unlock(&dq_list_lock); err = dquot_write_dquot(dquot); if (err && !ret) ret = err; dqput(dquot); spin_lock(&dq_list_lock); } spin_unlock(&dq_list_lock); } for (cnt = 0; cnt < MAXQUOTAS; cnt++) if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); dqstats_inc(DQST_SYNCS); return ret; } EXPORT_SYMBOL(dquot_writeback_dquots); /* Write all dquot structures to disk and make them visible from userspace */ int dquot_quota_sync(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) return ret; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) return 0; /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ if (sb->s_op->sync_fs) { ret = sb->s_op->sync_fs(sb, 1); if (ret) return ret; } ret = sync_blockdev(sb->s_bdev); if (ret) return ret; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } return 0; } EXPORT_SYMBOL(dquot_quota_sync); static unsigned long dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct dquot *dquot; unsigned long freed = 0; spin_lock(&dq_list_lock); while (!list_empty(&free_dquots) && sc->nr_to_scan) { dquot = list_first_entry(&free_dquots, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); sc->nr_to_scan--; freed++; } spin_unlock(&dq_list_lock); return freed; } static unsigned long dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio( percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } /* * Safely release dquot and put reference to dquot. */ static void quota_release_workfn(struct work_struct *work) { struct dquot *dquot; struct list_head rls_head; spin_lock(&dq_list_lock); /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); synchronize_srcu(&dquot_srcu); restart: spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); WARN_ON_ONCE(atomic_read(&dquot->dq_count)); /* * Note that DQ_RELEASING_B protects us from racing with * invalidate_dquots() calls so we are safe to work with the * dquot even after we drop dq_list_lock. */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ dquot_write_dquot(dquot); goto restart; } if (dquot_active(dquot)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->release_dquot(dquot); goto restart; } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); } /* * Put reference to dquot */ void dqput(struct dquot *dquot) { if (!dquot) return; #ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", quotatypes[dquot->dq_id.type], from_kqid(&init_user_ns, dquot->dq_id)); BUG(); } #endif dqstats_inc(DQST_DROPS); spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { /* We have more than one user... nothing to do */ atomic_dec(&dquot->dq_count); /* Releasing dquot during quotaoff phase? */ if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) && atomic_read(&dquot->dq_count) == 1) wake_up(&dquot_ref_wq); spin_unlock(&dq_list_lock); return; } /* Need to release dquot? */ WARN_ON_ONCE(!list_empty(&dquot->dq_free)); put_releasing_dquots(dquot); atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(system_unbound_wq, &quota_release_work, 1); } EXPORT_SYMBOL(dqput); struct dquot *dquot_alloc(struct super_block *sb, int type) { return kmem_cache_zalloc(dquot_cachep, GFP_NOFS); } EXPORT_SYMBOL(dquot_alloc); static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; dquot = sb->dq_op->alloc_dquot(sb, type); if(!dquot) return NULL; mutex_init(&dquot->dq_lock); INIT_LIST_HEAD(&dquot->dq_free); INIT_LIST_HEAD(&dquot->dq_inuse); INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); dquot->dq_sb = sb; dquot->dq_id = make_kqid_invalid(type); atomic_set(&dquot->dq_count, 1); spin_lock_init(&dquot->dq_dqb_lock); return dquot; } /* * Get reference to dquot * * Locking is slightly tricky here. We are guarded from parallel quotaoff() * destroying our dquot by: * a) checking for quota flags under dq_list_lock and * b) getting a reference to dquot before we release dq_list_lock */ struct dquot *dqget(struct super_block *sb, struct kqid qid) { unsigned int hashent = hashfn(sb, qid); struct dquot *dquot, *empty = NULL; if (!qid_has_mapping(sb->s_user_ns, qid)) return ERR_PTR(-EINVAL); if (!sb_has_quota_active(sb, qid.type)) return ERR_PTR(-ESRCH); we_slept: spin_lock(&dq_list_lock); spin_lock(&dq_state_lock); if (!sb_has_quota_active(sb, qid.type)) { spin_unlock(&dq_state_lock); spin_unlock(&dq_list_lock); dquot = ERR_PTR(-ESRCH); goto out; } spin_unlock(&dq_state_lock); dquot = find_dquot(hashent, sb, qid); if (!dquot) { if (!empty) { spin_unlock(&dq_list_lock); empty = get_empty_dquot(sb, qid.type); if (!empty) schedule(); /* Try to wait for a moment... */ goto we_slept; } dquot = empty; empty = NULL; dquot->dq_id = qid; /* all dquots go on the inuse_list */ put_inuse(dquot); /* hash it first so it can be found */ insert_dquot_hash(dquot); spin_unlock(&dq_list_lock); dqstats_inc(DQST_LOOKUPS); } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqstats_inc(DQST_CACHE_HITS); dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { int err; err = sb->dq_op->acquire_dquot(dquot); if (err < 0) { dqput(dquot); dquot = ERR_PTR(err); goto out; } } /* * Make sure following reads see filled structure - paired with * smp_mb__before_atomic() in dquot_acquire(). */ smp_rmb(); /* Has somebody invalidated entry under us? */ WARN_ON_ONCE(hlist_unhashed(&dquot->dq_hash)); out: if (empty) do_destroy_dquot(empty); return dquot; } EXPORT_SYMBOL(dqget); static inline struct dquot __rcu **i_dquot(struct inode *inode) { return inode->i_sb->s_op->get_dquots(inode); } static int dqinit_needed(struct inode *inode, int type) { struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) return 0; dquots = i_dquot(inode); if (type != -1) return !dquots[type]; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!dquots[cnt]) return 1; return 0; } /* This routine is guarded by s_umount semaphore */ static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || !atomic_read(&inode->i_writecount) || !dqinit_needed(inode, type)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif iput(old_inode); err = __dquot_initialize(inode, type); if (err) { iput(inode); goto out; } /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the * s_inode_list_lock. We cannot iput the inode now as we can be * holding the last reference and we cannot iput it under * s_inode_list_lock. So we keep the reference and iput it * later. */ old_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " "thus quota information is probably inconsistent. " "Please run quotacheck(8)"); } #endif return err; } static void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We have to scan also I_NEW inodes because they can already * have quota pointer initialized. Luckily, we need to touch * only quota pointers and these have separate locking * (dq_data_lock). */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { struct dquot __rcu **dquots = i_dquot(inode); struct dquot *dquot = srcu_dereference_check( dquots[type], &dquot_srcu, lockdep_is_held(&dq_data_lock)); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif rcu_assign_pointer(dquots[type], NULL); if (dquot) dqput(dquot); } spin_unlock(&dq_data_lock); } spin_unlock(&sb->s_inode_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened after quota" " was disabled thus quota information is probably " "inconsistent. Please run quotacheck(8).\n", sb->s_id); } #endif } /* Gather all references from inodes and drop them */ static void drop_dquot_ref(struct super_block *sb, int type) { if (sb->dq_op) remove_dquot_ref(sb, type); } static inline void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) { if (dquot->dq_dqb.dqb_rsvspace >= number) dquot->dq_dqb.dqb_rsvspace -= number; else { WARN_ON_ONCE(1); dquot->dq_dqb.dqb_rsvspace = 0; } if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curinodes >= number) dquot->dq_dqb.dqb_curinodes -= number; else dquot->dq_dqb.dqb_curinodes = 0; if (dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit) dquot->dq_dqb.dqb_itime = (time64_t) 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } static void dquot_decr_space(struct dquot *dquot, qsize_t number) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curspace >= number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } struct dquot_warn { struct super_block *w_sb; struct kqid w_dq_id; short w_type; }; static int warning_issued(struct dquot *dquot, const int warntype) { int flag = (warntype == QUOTA_NL_BHARDWARN || warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B : ((warntype == QUOTA_NL_IHARDWARN || warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0); if (!flag) return 0; return test_and_set_bit(flag, &dquot->dq_flags); } #ifdef CONFIG_PRINT_QUOTA_WARNING static int flag_print_warnings = 1; static int need_print_warning(struct dquot_warn *warn) { if (!flag_print_warnings) return 0; switch (warn->w_dq_id.type) { case USRQUOTA: return uid_eq(current_fsuid(), warn->w_dq_id.uid); case GRPQUOTA: return in_group_p(warn->w_dq_id.gid); case PRJQUOTA: return 1; } return 0; } /* Print warning to user which exceeded quota */ static void print_warning(struct dquot_warn *warn) { char *msg = NULL; struct tty_struct *tty; int warntype = warn->w_type; if (warntype == QUOTA_NL_IHARDBELOW || warntype == QUOTA_NL_ISOFTBELOW || warntype == QUOTA_NL_BHARDBELOW || warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn)) return; tty = get_current_tty(); if (!tty) return; tty_write_message(tty, warn->w_sb->s_id); if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) tty_write_message(tty, ": warning, "); else tty_write_message(tty, ": write failed, "); tty_write_message(tty, quotatypes[warn->w_dq_id.type]); switch (warntype) { case QUOTA_NL_IHARDWARN: msg = " file limit reached.\r\n"; break; case QUOTA_NL_ISOFTLONGWARN: msg = " file quota exceeded too long.\r\n"; break; case QUOTA_NL_ISOFTWARN: msg = " file quota exceeded.\r\n"; break; case QUOTA_NL_BHARDWARN: msg = " block limit reached.\r\n"; break; case QUOTA_NL_BSOFTLONGWARN: msg = " block quota exceeded too long.\r\n"; break; case QUOTA_NL_BSOFTWARN: msg = " block quota exceeded.\r\n"; break; } tty_write_message(tty, msg); tty_kref_put(tty); } #endif static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, int warntype) { if (warning_issued(dquot, warntype)) return; warn->w_type = warntype; warn->w_sb = dquot->dq_sb; warn->w_dq_id = dquot->dq_id; } /* * Write warnings to the console and send warning messages over netlink. * * Note that this function can call into tty and networking code. */ static void flush_warnings(struct dquot_warn *warn) { int i; for (i = 0; i < MAXQUOTAS; i++) { if (warn[i].w_type == QUOTA_NL_NOWARN) continue; #ifdef CONFIG_PRINT_QUOTA_WARNING print_warning(&warn[i]); #endif quota_send_warning(warn[i].w_dq_id, warn[i].w_sb->s_dev, warn[i].w_type); } } static int ignore_hardlimit(struct dquot *dquot) { struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & DQF_ROOT_SQUASH)); } static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes, struct dquot_warn *warn) { qsize_t newinodes; int ret = 0; spin_lock(&dquot->dq_dqb_lock); newinodes = dquot->dq_dqb.dqb_curinodes + inodes; if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) goto add; if (dquot->dq_dqb.dqb_ihardlimit && newinodes > dquot->dq_dqb.dqb_ihardlimit && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN); ret = -EDQUOT; goto out; } if (dquot->dq_dqb.dqb_isoftlimit && newinodes > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN); ret = -EDQUOT; goto out; } if (dquot->dq_dqb.dqb_isoftlimit && newinodes > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime == 0) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; } add: dquot->dq_dqb.dqb_curinodes = newinodes; out: spin_unlock(&dquot->dq_dqb_lock); return ret; } static int dquot_add_space(struct dquot *dquot, qsize_t space, qsize_t rsv_space, unsigned int flags, struct dquot_warn *warn) { qsize_t tspace; struct super_block *sb = dquot->dq_sb; int ret = 0; spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) goto finish; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space + rsv_space; if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); ret = -EDQUOT; goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); ret = -EDQUOT; goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime == 0) { if (flags & DQUOT_SPACE_WARN) { prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() + sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; } else { /* * We don't allow preallocation to exceed softlimit so exceeding will * be always printed */ ret = -EDQUOT; goto finish; } } finish: /* * We have to be careful and go through warning generation & grace time * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it * only here... */ if (flags & DQUOT_SPACE_NOFAIL) ret = 0; if (!ret) { dquot->dq_dqb.dqb_rsvspace += rsv_space; dquot->dq_dqb.dqb_curspace += space; } spin_unlock(&dquot->dq_dqb_lock); return ret; } static int info_idq_free(struct dquot *dquot, qsize_t inodes) { qsize_t newinodes; if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type)) return QUOTA_NL_NOWARN; newinodes = dquot->dq_dqb.dqb_curinodes - inodes; if (newinodes <= dquot->dq_dqb.dqb_isoftlimit) return QUOTA_NL_ISOFTBELOW; if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit && newinodes < dquot->dq_dqb.dqb_ihardlimit) return QUOTA_NL_IHARDBELOW; return QUOTA_NL_NOWARN; } static int info_bdq_free(struct dquot *dquot, qsize_t space) { qsize_t tspace; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace; if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || tspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; if (tspace >= dquot->dq_dqb.dqb_bhardlimit && tspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } static int inode_quota_active(const struct inode *inode) { struct super_block *sb = inode->i_sb; if (IS_NOQUOTA(inode)) return 0; return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb); } /* * Initialize quota pointers in inode * * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; struct dquot __rcu **dquots; struct dquot *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; if (!inode_quota_active(inode)) return 0; dquots = i_dquot(inode); /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { struct kqid qid; kprojid_t projid; int rc; struct dquot *dquot; if (type != -1 && cnt != type) continue; /* * The i_dquot should have been initialized in most cases, * we check it without locking here to avoid unnecessary * dqget()/dqput() calls. */ if (dquots[cnt]) continue; if (!sb_has_quota_active(sb, cnt)) continue; init_needed = 1; switch (cnt) { case USRQUOTA: qid = make_kqid_uid(inode->i_uid); break; case GRPQUOTA: qid = make_kqid_gid(inode->i_gid); break; case PRJQUOTA: rc = inode->i_sb->dq_op->get_projid(inode, &projid); if (rc) continue; qid = make_kqid_projid(projid); break; } dquot = dqget(sb, qid); if (IS_ERR(dquot)) { /* We raced with somebody turning quotas off... */ if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } got[cnt] = dquot; } /* All required i_dquot has been initialized */ if (!init_needed) return 0; spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) goto out_lock; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; /* Avoid races with quotaoff() */ if (!sb_has_quota_active(sb, cnt)) continue; /* We could race with quotaon or dqget() could have failed */ if (!got[cnt]) continue; if (!dquots[cnt]) { rcu_assign_pointer(dquots[cnt], got[cnt]); got[cnt] = NULL; /* * Make quota reservation system happy if someone * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) { struct dquot *dquot = srcu_dereference_check( dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); spin_lock(&inode->i_lock); /* Get reservation again under proper lock */ rsv = __inode_get_rsv_space(inode); spin_lock(&dquot->dq_dqb_lock); dquot->dq_dqb.dqb_rsvspace += rsv; spin_unlock(&dquot->dq_dqb_lock); spin_unlock(&inode->i_lock); } } } out_lock: spin_unlock(&dq_data_lock); out_put: /* Drop unused references */ dqput_all(got); return ret; } int dquot_initialize(struct inode *inode) { return __dquot_initialize(inode, -1); } EXPORT_SYMBOL(dquot_initialize); bool dquot_initialize_needed(struct inode *inode) { struct dquot __rcu **dquots; int i; if (!inode_quota_active(inode)) return false; dquots = i_dquot(inode); for (i = 0; i < MAXQUOTAS; i++) if (!dquots[i] && sb_has_quota_active(inode->i_sb, i)) return true; return false; } EXPORT_SYMBOL(dquot_initialize_needed); /* * Release all quotas referenced by inode. * * This function only be called on inode free or converting * a file to quota file, no other users for the i_dquot in * both cases, so we needn't call synchronize_srcu() after * clearing i_dquot. */ static void __dquot_drop(struct inode *inode) { int cnt; struct dquot __rcu **dquots = i_dquot(inode); struct dquot *put[MAXQUOTAS]; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); rcu_assign_pointer(dquots[cnt], NULL); } spin_unlock(&dq_data_lock); dqput_all(put); } void dquot_drop(struct inode *inode) { struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) return; /* * Test before calling to rule out calls from proc and such * where we are not allowed to block. Note that this is * actually reliable test even without the lock - the caller * must assure that nobody can come after the DQUOT_DROP and * add quota pointers back anyway. */ dquots = i_dquot(inode); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (dquots[cnt]) break; } if (cnt < MAXQUOTAS) __dquot_drop(inode); } EXPORT_SYMBOL(dquot_drop); /* * inode_reserved_space is managed internally by quota, and protected by * i_lock similar to i_blocks+i_bytes. */ static qsize_t *inode_reserved_space(struct inode * inode) { /* Filesystem must explicitly define it's own method in order to use * quota reservation interface */ BUG_ON(!inode->i_sb->dq_op->get_reserved_space); return inode->i_sb->dq_op->get_reserved_space(inode); } static qsize_t __inode_get_rsv_space(struct inode *inode) { if (!inode->i_sb->dq_op->get_reserved_space) return 0; return *inode_reserved_space(inode); } static qsize_t inode_get_rsv_space(struct inode *inode) { qsize_t ret; if (!inode->i_sb->dq_op->get_reserved_space) return 0; spin_lock(&inode->i_lock); ret = __inode_get_rsv_space(inode); spin_unlock(&inode->i_lock); return ret; } /* * This functions updates i_blocks+i_bytes fields and quota information * (together with appropriate checks). * * NOTE: We absolutely rely on the fact that caller dirties the inode * (usually helpers in quotaops.h care about this) and holds a handle for * the current transaction so that dquot write and inode write go into the * same transaction. */ /* * This operation can block, but only after everything is updated */ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; struct dquot __rcu **dquots; struct dquot *dquot; if (!inode_quota_active(inode)) { if (reserve) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; spin_unlock(&inode->i_lock); } else { inode_add_bytes(inode, number); } goto out; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; if (reserve) { ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); if (reserve) dquot_free_reserved_space(dquot, number); else dquot_decr_space(dquot, number); spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; } } if (reserve) *inode_reserved_space(inode) += number; else __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); if (reserve) goto out_flush_warn; ret = mark_all_dquot_dirty(dquots); out_flush_warn: srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); out: return ret; } EXPORT_SYMBOL(__dquot_alloc_space); /* * This operation can block, but only after everything is updated */ int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu * const *dquots; struct dquot *dquot; if (!inode_quota_active(inode)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; /* Back out changes we already did */ spin_lock(&dquot->dq_dqb_lock); dquot_decr_inodes(dquot, 1); spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } } warn_put_all: spin_unlock(&inode->i_lock); if (ret == 0) ret = mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); return ret; } EXPORT_SYMBOL(dquot_alloc_inode); /* * Convert in-memory reserved quotas to real consumed quotas */ void dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; dquot->dq_dqb.dqb_curspace += number; dquot->dq_dqb.dqb_rsvspace -= number; spin_unlock(&dquot->dq_dqb_lock); } } /* Update inode bytes */ *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); } EXPORT_SYMBOL(dquot_claim_space_nodirty); /* * Convert allocated space back to in-memory reserved quotas */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; dquot->dq_dqb.dqb_rsvspace += number; dquot->dq_dqb.dqb_curspace -= number; spin_unlock(&dquot->dq_dqb_lock); } } /* Update inode bytes */ *inode_reserved_space(inode) += number; __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); } EXPORT_SYMBOL(dquot_reclaim_space_nodirty); /* * This operation can block, but only after everything is updated */ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu **dquots; struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!inode_quota_active(inode)) { if (reserve) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; spin_unlock(&inode->i_lock); } else { inode_sub_bytes(inode, number); } return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquot, wtype); if (reserve) dquot_free_reserved_space(dquot, number); else dquot_decr_space(dquot, number); spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; else __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); if (reserve) goto out_unlock; mark_all_dquot_dirty(dquots); out_unlock: srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(__dquot_free_space); /* * This operation can block, but only after everything is updated */ void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu * const *dquots; struct dquot *dquot; int index; if (!inode_quota_active(inode)) return; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquot, wtype); dquot_decr_inodes(dquot, 1); spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(dquot_free_inode); /* * Transfer the number of inode and blocks from one diskquota to an other. * On success, dquot references in transfer_to are consumed and references * to original dquots that need to be released are placed there. On failure, * references are kept untouched. * * This operation can block, but only after everything is updated * A transaction must be started when entering this function. * * We are holding reference on transfer_from & transfer_to, no need to * protect them by srcu_read_lock(). */ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot __rcu **dquots; struct dquot *transfer_from[MAXQUOTAS] = {}; int cnt, index, ret = 0, err; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; struct dquot_warn warn_from_space[MAXQUOTAS]; if (IS_NOQUOTA(inode)) return 0; if (inode->i_sb->dq_op->get_inode_usage) { ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage); if (ret) return ret; } /* Initialize the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { warn_to[cnt].w_type = QUOTA_NL_NOWARN; warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; } spin_lock(&dq_data_lock); spin_lock(&inode->i_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); return 0; } cur_space = __inode_get_bytes(inode); rsv_space = __inode_get_rsv_space(inode); dquots = i_dquot(inode); /* * Build the transfer_from list, check limits, and update usage in * the target structures. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* * Skip changes for same uid or gid or for turned off quota-type. */ if (!transfer_to[cnt]) continue; /* Avoid races with quotaoff() */ if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; transfer_from[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); ret = dquot_add_inodes(transfer_to[cnt], inode_usage, &warn_to[cnt]); if (ret) goto over_quota; ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, DQUOT_SPACE_WARN, &warn_to[cnt]); if (ret) { spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); spin_unlock(&transfer_to[cnt]->dq_dqb_lock); goto over_quota; } } /* Decrease usage for source structures and update quota pointers */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!is_valid[cnt]) continue; /* Due to IO error we might not have transfer_from[] structure */ if (transfer_from[cnt]) { int wtype; spin_lock(&transfer_from[cnt]->dq_dqb_lock); wtype = info_idq_free(transfer_from[cnt], inode_usage); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_inodes[cnt], transfer_from[cnt], wtype); wtype = info_bdq_free(transfer_from[cnt], cur_space + rsv_space); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_space[cnt], transfer_from[cnt], wtype); dquot_decr_inodes(transfer_from[cnt], inode_usage); dquot_decr_space(transfer_from[cnt], cur_space); dquot_free_reserved_space(transfer_from[cnt], rsv_space); spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } rcu_assign_pointer(dquots[cnt], transfer_to[cnt]); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); /* * These arrays are local and we hold dquot references so we don't need * the srcu protection but still take dquot_srcu to avoid warning in * mark_all_dquot_dirty(). */ index = srcu_read_lock(&dquot_srcu); err = mark_all_dquot_dirty((struct dquot __rcu **)transfer_from); if (err < 0) ret = err; err = mark_all_dquot_dirty((struct dquot __rcu **)transfer_to); if (err < 0) ret = err; srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); /* Pass back references to put */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (is_valid[cnt]) transfer_to[cnt] = transfer_from[cnt]; return ret; over_quota: /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { if (!is_valid[cnt]) continue; spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); dquot_decr_space(transfer_to[cnt], cur_space); dquot_free_reserved_space(transfer_to[cnt], rsv_space); spin_unlock(&transfer_to[cnt]->dq_dqb_lock); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); flush_warnings(warn_to); return ret; } EXPORT_SYMBOL(__dquot_transfer); /* Wrapper for transferring ownership of an inode for uid/gid only * Called from FSXXX_setattr() */ int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode, struct iattr *iattr) { struct dquot *transfer_to[MAXQUOTAS] = {}; struct dquot *dquot; struct super_block *sb = inode->i_sb; int ret; if (!inode_quota_active(inode)) return 0; if (i_uid_needs_update(idmap, iattr, inode)) { kuid_t kuid = from_vfsuid(idmap, i_user_ns(inode), iattr->ia_vfsuid); dquot = dqget(sb, make_kqid_uid(kuid)); if (IS_ERR(dquot)) { if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } transfer_to[USRQUOTA] = dquot; } if (i_gid_needs_update(idmap, iattr, inode)) { kgid_t kgid = from_vfsgid(idmap, i_user_ns(inode), iattr->ia_vfsgid); dquot = dqget(sb, make_kqid_gid(kgid)); if (IS_ERR(dquot)) { if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } transfer_to[GRPQUOTA] = dquot; } ret = __dquot_transfer(inode, transfer_to); out_put: dqput_all(transfer_to); return ret; } EXPORT_SYMBOL(dquot_transfer); /* * Write info of quota file to disk */ int dquot_commit_info(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); return dqopt->ops[type]->write_file_info(sb, type); } EXPORT_SYMBOL(dquot_commit_info); int dquot_get_next_id(struct super_block *sb, struct kqid *qid) { struct quota_info *dqopt = sb_dqopt(sb); if (!sb_has_quota_active(sb, qid->type)) return -ESRCH; if (!dqopt->ops[qid->type]->get_next_id) return -ENOSYS; return dqopt->ops[qid->type]->get_next_id(sb, qid); } EXPORT_SYMBOL(dquot_get_next_id); /* * Definitions of diskquota operations. */ const struct dquot_operations dquot_operations = { .write_dquot = dquot_commit, .acquire_dquot = dquot_acquire, .release_dquot = dquot_release, .mark_dirty = dquot_mark_dquot_dirty, .write_info = dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_next_id = dquot_get_next_id, }; EXPORT_SYMBOL(dquot_operations); /* * Generic helper for ->open on filesystems supporting disk quotas. */ int dquot_file_open(struct inode *inode, struct file *file) { int error; error = generic_file_open(inode, file); if (!error && (file->f_mode & FMODE_WRITE)) error = dquot_initialize(inode); return error; } EXPORT_SYMBOL(dquot_file_open); static void vfs_cleanup_quota_inode(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); struct inode *inode = dqopt->files[type]; if (!inode) return; if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { inode_lock(inode); inode->i_flags &= ~S_NOQUOTA; inode_unlock(inode); } dqopt->files[type] = NULL; iput(inode); } /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ int dquot_disable(struct super_block *sb, int type, unsigned int flags) { int cnt; struct quota_info *dqopt = sb_dqopt(sb); rwsem_assert_held_write(&sb->s_umount); /* Cannot turn off usage accounting without turning off limits, or * suspend quotas and simultaneously turn quotas off. */ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED)) || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED))) return -EINVAL; /* * Skip everything if there's nothing to do. We have to do this because * sometimes we are called when fill_super() failed and calling * sync_fs() in such cases does no good. */ if (!sb_any_quota_loaded(sb)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_loaded(sb, cnt)) continue; if (flags & DQUOT_SUSPENDED) { spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); } else { spin_lock(&dq_state_lock); dqopt->flags &= ~dquot_state_flag(flags, cnt); /* Turning off suspended quotas? */ if (!sb_has_quota_loaded(sb, cnt) && sb_has_quota_suspended(sb, cnt)) { dqopt->flags &= ~dquot_state_flag( DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); vfs_cleanup_quota_inode(sb, cnt); continue; } spin_unlock(&dq_state_lock); } /* We still have to keep quota loaded? */ if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED)) continue; /* Note: these are blocking operations */ drop_dquot_ref(sb, cnt); invalidate_dquots(sb, cnt); /* * Now all dquots should be invalidated, all writes done so we * should be only users of the info. No locks needed. */ if (info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; dqopt->ops[cnt] = NULL; } /* Skip syncing and setting flags if quota files are hidden */ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) goto put_inodes; /* Sync the superblock so that buffers with quota data are written to * disk (and so userspace sees correct data afterwards). */ if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); /* Now the quota files are just ordinary files and we can set the * inode flags back. Moreover we discard the pagecache so that * userspace sees the writes we did bypassing the pagecache. We * must also discard the blockdev buffers so that we see the * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!sb_has_quota_loaded(sb, cnt) && dqopt->files[cnt]) { inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev); put_inodes: /* We are done when suspending quotas */ if (flags & DQUOT_SUSPENDED) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!sb_has_quota_loaded(sb, cnt)) vfs_cleanup_quota_inode(sb, cnt); return 0; } EXPORT_SYMBOL(dquot_disable); int dquot_quota_off(struct super_block *sb, int type) { return dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); } EXPORT_SYMBOL(dquot_quota_off); /* * Turn quotas on on a device */ static int vfs_setup_quota_inode(struct inode *inode, int type) { struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); if (is_bad_inode(inode)) return -EUCLEAN; if (!S_ISREG(inode->i_mode)) return -EACCES; if (IS_RDONLY(inode)) return -EROFS; if (sb_has_quota_loaded(sb, type)) return -EBUSY; /* * Quota files should never be encrypted. They should be thought of as * filesystem metadata, not user data. New-style internal quota files * cannot be encrypted by users anyway, but old-style external quota * files could potentially be incorrectly created in an encrypted * directory, hence this explicit check. Some reasons why encrypted * quota files don't work include: (1) some filesystems that support * encryption don't handle it in their quota_read and quota_write, and * (2) cleaning up encrypted quota files at unmount would need special * consideration, as quota files are cleaned up later than user files. */ if (IS_ENCRYPTED(inode)) return -EINVAL; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) return -EIO; if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* We don't want quota and atime on quota files (deadlocks * possible) Also nobody should write to the file - we use * special IO operations which ignore the immutable bit. */ inode_lock(inode); inode->i_flags |= S_NOQUOTA; inode_unlock(inode); /* * When S_NOQUOTA is set, remove dquot references as no more * references can be added */ __dquot_drop(inode); } return 0; } int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags) { struct quota_format_type *fmt; struct quota_info *dqopt = sb_dqopt(sb); int error; lockdep_assert_held_write(&sb->s_umount); /* Just unsuspend quotas? */ if (WARN_ON_ONCE(flags & DQUOT_SUSPENDED)) return -EINVAL; fmt = find_quota_format(format_id); if (!fmt) return -ESRCH; if (!sb->dq_op || !sb->s_qcop || (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; goto out_fmt; } /* Filesystems outside of init_user_ns not yet supported */ if (sb->s_user_ns != &init_user_ns) { error = -EINVAL; goto out_fmt; } /* Usage always has to be set... */ if (!(flags & DQUOT_USAGE_ENABLED)) { error = -EINVAL; goto out_fmt; } if (sb_has_quota_loaded(sb, type)) { error = -EBUSY; goto out_fmt; } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* As we bypass the pagecache we must now flush all the * dirty data and invalidate caches so that kernel sees * changes from userspace. It is not enough to just flush * the quota file since if blocksize < pagesize, invalidation * of the cache could fail because of other unrelated dirty * data */ sync_filesystem(sb); invalidate_bdev(sb->s_bdev); } error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) goto out_fmt; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; dqopt->info[type].dqi_fmt_id = format_id; INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); error = dqopt->ops[type]->read_file_info(sb, type); if (error < 0) goto out_fmt; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) { spin_lock(&dq_data_lock); dqopt->info[type].dqi_flags |= DQF_SYS_FILE; spin_unlock(&dq_data_lock); } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); error = add_dquot_ref(sb, type); if (error) dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; out_fmt: put_quota_format(fmt); return error; } EXPORT_SYMBOL(dquot_load_quota_sb); /* * More powerful function for turning on quotas on given quota inode allowing * setting of individual quota flags */ int dquot_load_quota_inode(struct inode *inode, int type, int format_id, unsigned int flags) { int err; err = vfs_setup_quota_inode(inode, type); if (err < 0) return err; err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags); if (err < 0) vfs_cleanup_quota_inode(inode->i_sb, type); return err; } EXPORT_SYMBOL(dquot_load_quota_inode); /* Reenable quotas on remount RW */ int dquot_resume(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); int ret = 0, cnt; unsigned int flags; rwsem_assert_held_write(&sb->s_umount); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_suspended(sb, cnt)) continue; spin_lock(&dq_state_lock); flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, cnt); dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); spin_unlock(&dq_state_lock); flags = dquot_generic_flag(flags, cnt); ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id, flags); if (ret < 0) vfs_cleanup_quota_inode(sb, cnt); } return ret; } EXPORT_SYMBOL(dquot_resume); int dquot_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { int error = security_quota_on(path->dentry); if (error) return error; /* Quota file not on the same filesystem? */ if (path->dentry->d_sb != sb) error = -EXDEV; else error = dquot_load_quota_inode(d_inode(path->dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; } EXPORT_SYMBOL(dquot_quota_on); /* * This function is used when filesystem needs to initialize quotas * during mount time. */ int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type) { struct dentry *dentry; int error; dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name)); if (IS_ERR(dentry)) return PTR_ERR(dentry); error = security_quota_on(dentry); if (!error) error = dquot_load_quota_inode(d_inode(dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); dput(dentry); return error; } EXPORT_SYMBOL(dquot_quota_on_mount); static int dquot_quota_enable(struct super_block *sb, unsigned int flags) { int ret; int type; struct quota_info *dqopt = sb_dqopt(sb); if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) return -ENOSYS; /* Accounting cannot be turned on while fs is mounted */ flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT); if (!flags) return -EINVAL; for (type = 0; type < MAXQUOTAS; type++) { if (!(flags & qtype_enforce_flag(type))) continue; /* Can't enforce without accounting */ if (!sb_has_quota_usage_enabled(sb, type)) { ret = -EINVAL; goto out_err; } if (sb_has_quota_limits_enabled(sb, type)) { /* compatible with XFS */ ret = -EEXIST; goto out_err; } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); spin_unlock(&dq_state_lock); } return 0; out_err: /* Backout enforcement enablement we already did */ for (type--; type >= 0; type--) { if (flags & qtype_enforce_flag(type)) dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); } return ret; } static int dquot_quota_disable(struct super_block *sb, unsigned int flags) { int ret; int type; struct quota_info *dqopt = sb_dqopt(sb); if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) return -ENOSYS; /* * We don't support turning off accounting via quotactl. In principle * quota infrastructure can do this but filesystems don't expect * userspace to be able to do it. */ if (flags & (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT)) return -EOPNOTSUPP; /* Filter out limits not enabled */ for (type = 0; type < MAXQUOTAS; type++) if (!sb_has_quota_limits_enabled(sb, type)) flags &= ~qtype_enforce_flag(type); /* Nothing left? */ if (!flags) return -EEXIST; for (type = 0; type < MAXQUOTAS; type++) { if (flags & qtype_enforce_flag(type)) { ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); if (ret < 0) goto out_err; } } return 0; out_err: /* Backout enforcement disabling we already did */ for (type--; type >= 0; type--) { if (flags & qtype_enforce_flag(type)) { spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); spin_unlock(&dq_state_lock); } } return ret; } /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; memset(di, 0, sizeof(*di)); spin_lock(&dquot->dq_dqb_lock); di->d_spc_hardlimit = dm->dqb_bhardlimit; di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit; di->d_ino_softlimit = dm->dqb_isoftlimit; di->d_space = dm->dqb_curspace + dm->dqb_rsvspace; di->d_ino_count = dm->dqb_curinodes; di->d_spc_timer = dm->dqb_btime; di->d_ino_timer = dm->dqb_itime; spin_unlock(&dquot->dq_dqb_lock); } int dquot_get_dqblk(struct super_block *sb, struct kqid qid, struct qc_dqblk *di) { struct dquot *dquot; dquot = dqget(sb, qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); return 0; } EXPORT_SYMBOL(dquot_get_dqblk); int dquot_get_next_dqblk(struct super_block *sb, struct kqid *qid, struct qc_dqblk *di) { struct dquot *dquot; int err; if (!sb->dq_op->get_next_id) return -ENOSYS; err = sb->dq_op->get_next_id(sb, qid); if (err < 0) return err; dquot = dqget(sb, *qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); return 0; } EXPORT_SYMBOL(dquot_get_next_dqblk); #define VFS_QC_MASK \ (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \ QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \ QC_SPC_TIMER | QC_INO_TIMER) /* Generic routine for setting common part of quota structure */ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; int ret; if (di->d_fieldmask & ~VFS_QC_MASK) return -EINVAL; if (((di->d_fieldmask & QC_SPC_SOFT) && di->d_spc_softlimit > dqi->dqi_max_spc_limit) || ((di->d_fieldmask & QC_SPC_HARD) && di->d_spc_hardlimit > dqi->dqi_max_spc_limit) || ((di->d_fieldmask & QC_INO_SOFT) && (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) || ((di->d_fieldmask & QC_INO_HARD) && (di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) return -ERANGE; spin_lock(&dquot->dq_dqb_lock); if (di->d_fieldmask & QC_SPACE) { dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_SPC_SOFT) dm->dqb_bsoftlimit = di->d_spc_softlimit; if (di->d_fieldmask & QC_SPC_HARD) dm->dqb_bhardlimit = di->d_spc_hardlimit; if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_COUNT) { dm->dqb_curinodes = di->d_ino_count; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_SOFT) dm->dqb_isoftlimit = di->d_ino_softlimit; if (di->d_fieldmask & QC_INO_HARD) dm->dqb_ihardlimit = di->d_ino_hardlimit; if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_SPC_TIMER) { dm->dqb_btime = di->d_spc_timer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_TIMER) { dm->dqb_itime = di->d_ino_timer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } if (check_blim) { if (!dm->dqb_bsoftlimit || dm->dqb_curspace + dm->dqb_rsvspace <= dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_SPC_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = ktime_get_real_seconds() + dqi->dqi_bgrace; } if (check_ilim) { if (!dm->dqb_isoftlimit || dm->dqb_curinodes <= dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_INO_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = ktime_get_real_seconds() + dqi->dqi_igrace; } if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); spin_unlock(&dquot->dq_dqb_lock); ret = mark_dquot_dirty(dquot); if (ret < 0) return ret; return 0; } int dquot_set_dqblk(struct super_block *sb, struct kqid qid, struct qc_dqblk *di) { struct dquot *dquot; int rc; dquot = dqget(sb, qid); if (IS_ERR(dquot)) { rc = PTR_ERR(dquot); goto out; } rc = do_set_dqblk(dquot, di); dqput(dquot); out: return rc; } EXPORT_SYMBOL(dquot_set_dqblk); /* Generic routine for getting common part of quota file information */ int dquot_get_state(struct super_block *sb, struct qc_state *state) { struct mem_dqinfo *mi; struct qc_type_state *tstate; struct quota_info *dqopt = sb_dqopt(sb); int type; memset(state, 0, sizeof(*state)); for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_active(sb, type)) continue; tstate = state->s_state + type; mi = sb_dqopt(sb)->info + type; tstate->flags = QCI_ACCT_ENABLED; spin_lock(&dq_data_lock); if (mi->dqi_flags & DQF_SYS_FILE) tstate->flags |= QCI_SYSFILE; if (mi->dqi_flags & DQF_ROOT_SQUASH) tstate->flags |= QCI_ROOT_SQUASH; if (sb_has_quota_limits_enabled(sb, type)) tstate->flags |= QCI_LIMITS_ENFORCED; tstate->spc_timelimit = mi->dqi_bgrace; tstate->ino_timelimit = mi->dqi_igrace; if (dqopt->files[type]) { tstate->ino = dqopt->files[type]->i_ino; tstate->blocks = dqopt->files[type]->i_blocks; } tstate->nextents = 1; /* We don't know... */ spin_unlock(&dq_data_lock); } return 0; } EXPORT_SYMBOL(dquot_get_state); /* Generic routine for setting common part of quota file information */ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) { struct mem_dqinfo *mi; if ((ii->i_fieldmask & QC_WARNS_MASK) || (ii->i_fieldmask & QC_RT_SPC_TIMER)) return -EINVAL; if (!sb_has_quota_active(sb, type)) return -ESRCH; mi = sb_dqopt(sb)->info + type; if (ii->i_fieldmask & QC_FLAGS) { if ((ii->i_flags & QCI_ROOT_SQUASH && mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) return -EINVAL; } spin_lock(&dq_data_lock); if (ii->i_fieldmask & QC_SPC_TIMER) mi->dqi_bgrace = ii->i_spc_timelimit; if (ii->i_fieldmask & QC_INO_TIMER) mi->dqi_igrace = ii->i_ino_timelimit; if (ii->i_fieldmask & QC_FLAGS) { if (ii->i_flags & QCI_ROOT_SQUASH) mi->dqi_flags |= DQF_ROOT_SQUASH; else mi->dqi_flags &= ~DQF_ROOT_SQUASH; } spin_unlock(&dq_data_lock); mark_info_dirty(sb, type); /* Force write to disk */ return sb->dq_op->write_info(sb, type); } EXPORT_SYMBOL(dquot_set_dqinfo); const struct quotactl_ops dquot_quotactl_sysfile_ops = { .quota_enable = dquot_quota_enable, .quota_disable = dquot_quota_disable, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .get_nextdqblk = dquot_get_next_dqblk, .set_dqblk = dquot_set_dqblk }; EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); static int do_proc_dqstats(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int type = (unsigned long *)table->data - dqstats.stat; s64 value = percpu_counter_sum(&dqstats.counter[type]); /* Filter negative values for non-monotonic counters */ if (value < 0 && (type == DQST_ALLOC_DQUOTS || type == DQST_FREE_DQUOTS)) value = 0; /* Update global table */ dqstats.stat[type] = value; return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static struct ctl_table fs_dqstats_table[] = { { .procname = "lookups", .data = &dqstats.stat[DQST_LOOKUPS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "drops", .data = &dqstats.stat[DQST_DROPS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "reads", .data = &dqstats.stat[DQST_READS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "writes", .data = &dqstats.stat[DQST_WRITES], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", .data = &dqstats.stat[DQST_CACHE_HITS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", .data = &dqstats.stat[DQST_ALLOC_DQUOTS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", .data = &dqstats.stat[DQST_FREE_DQUOTS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "syncs", .data = &dqstats.stat[DQST_SYNCS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, #ifdef CONFIG_PRINT_QUOTA_WARNING { .procname = "warnings", .data = &flag_print_warnings, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif }; static int __init dquot_init(void) { int i, ret; unsigned long nr_hash, order; struct shrinker *dqcache_shrinker; printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); register_sysctl_init("fs/quota", fs_dqstats_table); dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_PANIC), NULL); order = 0; dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order); if (!dquot_hash) panic("Cannot create dquot hash table"); ret = percpu_counter_init_many(dqstats.counter, 0, GFP_KERNEL, _DQST_DQSTAT_LAST); if (ret) panic("Cannot create dquot stat counters"); /* Find power-of-two hlist_heads which can fit into allocation */ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); dq_hash_bits = ilog2(nr_hash); nr_hash = 1UL << dq_hash_bits; dq_hash_mask = nr_hash - 1; for (i = 0; i < nr_hash; i++) INIT_HLIST_HEAD(dquot_hash + i); pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); dqcache_shrinker = shrinker_alloc(0, "dquota-cache"); if (!dqcache_shrinker) panic("Cannot allocate dquot shrinker"); dqcache_shrinker->count_objects = dqcache_shrink_count; dqcache_shrinker->scan_objects = dqcache_shrink_scan; shrinker_register(dqcache_shrinker); return 0; } fs_initcall(dquot_init);
24 24 141 141 33366 33366 8962 30992 31002 12 12 31005 31170 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMZONE_H #define _LINUX_MMZONE_H #ifndef __ASSEMBLY__ #ifndef __GENERATING_BOUNDS_H #include <linux/spinlock.h> #include <linux/list.h> #include <linux/list_nulls.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/numa.h> #include <linux/init.h> #include <linux/seqlock.h> #include <linux/nodemask.h> #include <linux/pageblock-flags.h> #include <linux/page-flags-layout.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/local_lock.h> #include <linux/zswap.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_PAGE_ORDER 10 #else #define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif #define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES) #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should * coalesce naturally under reasonable reclaim pressure and those which * will not. */ #define PAGE_ALLOC_COSTLY_ORDER 3 enum migratetype { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way * ZONE_MOVABLE works. Only movable pages can be allocated * from MIGRATE_CMA pageblocks and page allocator never * implicitly change migration type of MIGRATE_CMA pageblock. * * The way to use it is to change migratetype of a range of * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. */ MIGRATE_CMA, #endif #ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ #endif MIGRATE_TYPES }; /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) # define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false # define is_migrate_cma_folio(folio, pfn) false #endif static inline bool is_migrate_movable(int mt) { return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE; } /* * Check whether a migratetype can be merged with another migratetype. * * It is only mergeable when it can fall back to other migratetypes for * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c. */ static inline bool migratetype_is_mergeable(int mt) { return mt < MIGRATE_PCPTYPES; } #define for_each_migratetype_order(order, type) \ for (order = 0; order < NR_PAGE_ORDERS; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) extern int page_group_by_mobility_disabled; #define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) #define get_pageblock_migratetype(page) \ get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) #define folio_migratetype(folio) \ get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \ MIGRATETYPE_MASK) struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; }; struct pglist_data; #ifdef CONFIG_NUMA enum numa_stat_item { NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ NUMA_FOREIGN, /* was intended here, hit elsewhere */ NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ NR_VM_NUMA_EVENT_ITEMS }; #else #define NR_VM_NUMA_EVENT_ITEMS 0 #endif enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, NR_ZONE_INACTIVE_FILE, NR_ZONE_ACTIVE_FILE, NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif NR_FREE_CMA_PAGES, #ifdef CONFIG_UNACCEPTED_MEMORY NR_UNACCEPTED, #endif NR_VM_ZONE_STAT_ITEMS }; enum node_stat_item { NR_LRU_BASE, NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ NR_SLAB_RECLAIMABLE_B, NR_SLAB_UNRECLAIMABLE_B, NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, WORKINGSET_REFAULT_BASE, WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, WORKINGSET_REFAULT_FILE, WORKINGSET_ACTIVATE_BASE, WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, WORKINGSET_ACTIVATE_FILE, WORKINGSET_RESTORE_BASE, WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, WORKINGSET_RESTORE_FILE, WORKINGSET_NODERECLAIM, NR_ANON_MAPPED, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, NR_FILE_THPS, NR_FILE_PMDMAPPED, NR_ANON_THPS, NR_VMSCAN_WRITE, NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_THROTTLED_WRITTEN, /* NR_WRITTEN while reclaim throttled */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ NR_KERNEL_STACK_KB, /* measured in KiB */ #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ NR_SECONDARY_PAGETABLE, /* secondary pagetables, KVM & IOMMU */ #ifdef CONFIG_IOMMU_SUPPORT NR_IOMMU_PAGES, /* # of pages allocated by IOMMU */ #endif #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ PGPROMOTE_CANDIDATE, /* candidate pages to promote */ #endif /* PGDEMOTE_*: pages demoted */ PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif NR_VM_NODE_STAT_ITEMS }; /* * Returns true if the item should be printed in THPs (/proc/vmstat * currently prints number of anon, file and shmem THPs. But the item * is charged in pages). */ static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return false; return item == NR_ANON_THPS || item == NR_FILE_THPS || item == NR_SHMEM_THPS || item == NR_SHMEM_PMDMAPPED || item == NR_FILE_PMDMAPPED; } /* * Returns true if the value is measured in bytes (most vmstat values are * measured in pages). This defines the API part, the internal representation * might be different. */ static __always_inline bool vmstat_item_in_bytes(int idx) { /* * Global and per-node slab counters track slab pages. * It's expected that changes are multiples of PAGE_SIZE. * Internally values are stored in pages. * * Per-memcg and per-lruvec counters track memory, consumed * by individual slab objects. These counters are actually * byte-precise. */ return (idx == NR_SLAB_RECLAIMABLE_B || idx == NR_SLAB_UNRECLAIMABLE_B); } /* * We do arithmetic on the LRU lists in various places in the code, * so it is important to keep the active lists LRU_ACTIVE higher in * the array than the corresponding inactive lists, and to keep * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. * * This has to be kept in sync with the statistics in zone_stat_item * above and the descriptions in vmstat_text in mm/vmstat.c */ #define LRU_BASE 0 #define LRU_ACTIVE 1 #define LRU_FILE 2 enum lru_list { LRU_INACTIVE_ANON = LRU_BASE, LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, LRU_UNEVICTABLE, NR_LRU_LISTS }; enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_ISOLATED, VMSCAN_THROTTLE_NOPROGRESS, VMSCAN_THROTTLE_CONGESTED, NR_VMSCAN_THROTTLE, }; #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) static inline bool is_file_lru(enum lru_list lru) { return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); } static inline bool is_active_lru(enum lru_list lru) { return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } #define WORKINGSET_ANON 0 #define WORKINGSET_FILE 1 #define ANON_AND_FILE 2 enum lruvec_flags { /* * An lruvec has many dirty pages backed by a congested BDI: * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim. * It can be cleared by cgroup reclaim or kswapd. * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim. * It can only be cleared by kswapd. * * Essentially, kswapd can unthrottle an lruvec throttled by cgroup * reclaim, but not vice versa. This only applies to the root cgroup. * The goal is to prevent cgroup reclaim on the root cgroup (e.g. * memory.reclaim) to unthrottle an unbalanced node (that was throttled * by kswapd). */ LRUVEC_CGROUP_CONGESTED, LRUVEC_NODE_CONGESTED, }; #endif /* !__GENERATING_BOUNDS_H */ /* * Evictable pages are divided into multiple generations. The youngest and the * oldest generation numbers, max_seq and min_seq, are monotonically increasing. * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while * a page is on one of lrugen->folios[]. Otherwise it stores 0. * * A page is added to the youngest generation on faulting. The aging needs to * check the accessed bit at least twice before handing this page over to the * eviction. The first check takes care of the accessed bit set on the initial * fault; the second check makes sure this page hasn't been used since then. * This process, AKA second chance, requires a minimum of two generations, * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive * LRU, e.g., /proc/vmstat, these two generations are considered active; the * rest of generations, if they exist, are considered inactive. See * lru_gen_is_active(). * * PG_active is always cleared while a page is on one of lrugen->folios[] so * that the aging needs not to worry about it. And it's set again when a page * considered active is isolated for non-reclaiming purposes, e.g., migration. * See lru_gen_add_folio() and lru_gen_del_folio(). * * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits * in folio->flags. */ #define MIN_NR_GENS 2U #define MAX_NR_GENS 4U /* * Each generation is divided into multiple tiers. A page accessed N times * through file descriptors is in tier order_base_2(N). A page in the first tier * (N=0,1) is marked by PG_referenced unless it was faulted in through page * tables or read ahead. A page in any other tier (N>1) is marked by * PG_referenced and PG_workingset. This implies a minimum of two tiers is * supported without using additional bits in folio->flags. * * In contrast to moving across generations which requires the LRU lock, moving * across tiers only involves atomic operations on folio->flags and therefore * has a negligible cost in the buffered access path. In the eviction path, * comparisons of refaulted/(evicted+protected) from the first tier and the * rest infer whether pages accessed multiple times through file descriptors * are statistically hot and thus worth protecting. * * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in * folio->flags. */ #define MAX_NR_TIERS 4U #ifndef __GENERATING_BOUNDS_H struct lruvec; struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) #ifdef CONFIG_LRU_GEN enum { LRU_GEN_ANON, LRU_GEN_FILE, }; enum { LRU_GEN_CORE, LRU_GEN_MM_WALK, LRU_GEN_NONLEAF_YOUNG, NR_LRU_GEN_CAPS }; #define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) /* whether to keep historical stats from evicted generations */ #ifdef CONFIG_LRU_GEN_STATS #define NR_HIST_GENS MAX_NR_GENS #else #define NR_HIST_GENS 1U #endif /* * The youngest generation number is stored in max_seq for both anon and file * types as they are aged on an equal footing. The oldest generation numbers are * stored in min_seq[] separately for anon and file types as clean file pages * can be evicted regardless of swap constraints. * * Normally anon and file min_seq are in sync. But if swapping is constrained, * e.g., out of swap space, file min_seq is allowed to advance and leave anon * min_seq behind. * * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. */ struct lru_gen_folio { /* the aging increments the youngest generation number */ unsigned long max_seq; /* the eviction increments the oldest generation numbers */ unsigned long min_seq[ANON_AND_FILE]; /* the birth time of each generation in jiffies */ unsigned long timestamps[MAX_NR_GENS]; /* the multi-gen LRU lists, lazily sorted on eviction */ struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the multi-gen LRU sizes, eventually consistent */ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the exponential moving average of refaulted */ unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; /* the exponential moving average of evicted+protected */ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; /* the first tier doesn't need protection, hence the minus one */ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; /* can be modified without holding the LRU lock */ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* whether the multi-gen LRU is enabled */ bool enabled; /* the memcg generation this lru_gen_folio belongs to */ u8 gen; /* the list segment this lru_gen_folio belongs to */ u8 seg; /* per-node lru_gen_folio list for global reclaim */ struct hlist_nulls_node list; }; enum { MM_LEAF_TOTAL, /* total leaf entries */ MM_LEAF_YOUNG, /* young leaf entries */ MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ NR_MM_STATS }; /* double-buffering Bloom filters */ #define NR_BLOOM_FILTERS 2 struct lru_gen_mm_state { /* synced with max_seq after each iteration */ unsigned long seq; /* where the current iteration continues after */ struct list_head *head; /* where the last iteration ended before */ struct list_head *tail; /* Bloom filters flip after each iteration */ unsigned long *filters[NR_BLOOM_FILTERS]; /* the mm stats for debugging */ unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; }; struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; /* max_seq from lru_gen_folio: can be out of date */ unsigned long seq; /* the next address within an mm to scan */ unsigned long next_addr; /* to batch promoted pages */ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* to batch the mm stats */ int mm_stats[NR_MM_STATS]; /* total batched items */ int batched; bool can_swap; bool force_scan; }; /* * For each node, memcgs are divided into two generations: the old and the * young. For each generation, memcgs are randomly sharded into multiple bins * to improve scalability. For each bin, the hlist_nulls is virtually divided * into three segments: the head, the tail and the default. * * An onlining memcg is added to the tail of a random bin in the old generation. * The eviction starts at the head of a random bin in the old generation. The * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes * the old generation, is incremented when all its bins become empty. * * There are four operations: * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; * 3. The first attempt to reclaim a memcg offlined or below reclaimable size * threshold, which triggers MEMCG_LRU_TAIL; * 4. The second attempt to reclaim a memcg offlined or below reclaimable size * threshold, which triggers MEMCG_LRU_YOUNG; * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * * Notes: * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing * of their max_seq counters ensures the eventual fairness to all eligible * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). * 2. There are only two valid generations: old (seq) and young (seq+1). * MEMCG_NR_GENS is set to three so that when reading the generation counter * locklessly, a stale value (seq-1) does not wraparound to young. */ #define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { /* the per-node memcg generation counter */ unsigned long seq; /* each memcg has one lru_gen_folio per node */ unsigned long nr_memcgs[MEMCG_NR_GENS]; /* per-node lru_gen_folio list for global reclaim */ struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS]; /* protects the above */ spinlock_t lock; }; void lru_gen_init_pgdat(struct pglist_data *pgdat); void lru_gen_init_lruvec(struct lruvec *lruvec); bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); void lru_gen_online_memcg(struct mem_cgroup *memcg); void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) { } static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { return false; } static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_online_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) { } static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } #endif /* CONFIG_LRU_GEN */ struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* per lruvec lru_lock for memcg */ spinlock_t lru_lock; /* * These track the cost of reclaiming one LRU - file or anon - * over the other. As the observed cost of reclaiming one LRU * increases, the reclaim scan balance tips toward the other. */ unsigned long anon_cost; unsigned long file_cost; /* Non-resident age, driven by LRU movement */ atomic_long_t nonresident_age; /* Refaults at the time of last reclaim cycle */ unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ struct lru_gen_folio lrugen; #ifdef CONFIG_LRU_GEN_WALKS_MMU /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif #endif /* CONFIG_LRU_GEN */ #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif struct zswap_lruvec_state zswap_lruvec_state; }; /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8) /* LRU Isolation modes. */ typedef unsigned __bitwise isolate_mode_t; enum zone_watermarks { WMARK_MIN, WMARK_LOW, WMARK_HIGH, WMARK_PROMO, NR_WMARK }; /* * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define NR_PCP_THP 2 #else #define NR_PCP_THP 0 #endif #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) /* * Flags used in pcp->flags field. * * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the * previous page freeing. To avoid to drain PCP for an accident * high-order page freeing. * * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before * draining PCP for consecutive high-order pages freeing without * allocation if data cache slice of CPU is large enough. To reduce * zone lock contention and keep cache-hot pages reusing. */ #define PCPF_PREV_FREE_HIGH_ORDER BIT(0) #define PCPF_FREE_HIGH_BATCH BIT(1) struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int high_min; /* min high watermark */ int high_max; /* max high watermark */ int batch; /* chunk size for buddy add/remove */ u8 flags; /* protected by pcp->lock */ u8 alloc_factor; /* batch scaling factor during allocate */ #ifdef CONFIG_NUMA u8 expire; /* When 0, remote pagesets are drained */ #endif short free_count; /* consecutive free count */ /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; } ____cacheline_aligned_in_smp; struct per_cpu_zonestat { #ifdef CONFIG_SMP s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; s8 stat_threshold; #endif #ifdef CONFIG_NUMA /* * Low priority inaccurate counters that are only folded * on demand. Use a large type to avoid the overhead of * folding during refresh_cpu_vm_stats. */ unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #endif }; struct per_cpu_nodestat { s8 stat_threshold; s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS]; }; #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { /* * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able * to DMA to all of the addressable memory (ZONE_NORMAL). * On architectures where this area covers the whole 32 bit address * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller * DMA addressing constraints. This distinction is important as a 32bit * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit * platforms may need both zones as they support peripherals with * different DMA addressing limitations. */ #ifdef CONFIG_ZONE_DMA ZONE_DMA, #endif #ifdef CONFIG_ZONE_DMA32 ZONE_DMA32, #endif /* * Normal addressable memory is in ZONE_NORMAL. DMA operations can be * performed on pages in ZONE_NORMAL if the DMA devices support * transfers to all addressable memory. */ ZONE_NORMAL, #ifdef CONFIG_HIGHMEM /* * A memory area that is only addressable by the kernel through * mapping portions into its own address space. This is for example * used by i386 to allow the kernel to address the memory beyond * 900MB. The kernel will set up special mappings (page * table entries on i386) for each page that the kernel needs to * access. */ ZONE_HIGHMEM, #endif /* * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains * movable pages with few exceptional cases described below. Main use * cases for ZONE_MOVABLE are to make memory offlining/unplug more * likely to succeed, and to locally limit unmovable allocations - e.g., * to increase the number of THP/huge pages. Notable special cases are: * * 1. Pinned pages: (long-term) pinning of movable pages might * essentially turn such pages unmovable. Therefore, we do not allow * pinning long-term pages in ZONE_MOVABLE. When pages are pinned and * faulted, they come from the right zone right away. However, it is * still possible that address space already has pages in * ZONE_MOVABLE at the time when pages are pinned (i.e. user has * touches that memory before pinning). In such case we migrate them * to a different zone. When migration fails - pinning fails. * 2. memblock allocations: kernelcore/movablecore setups might create * situations where ZONE_MOVABLE contains unmovable allocations * after boot. Memory offlining and allocations fail early. * 3. Memory holes: kernelcore/movablecore setups might create very rare * situations where ZONE_MOVABLE contains memory holes after boot, * for example, if we have sections that are only partially * populated. Memory offlining and allocations fail early. * 4. PG_hwpoison pages: while poisoned pages can be skipped during * memory offlining, such pages cannot be allocated. * 5. Unmovable PG_offline pages: in paravirtualized environments, * hotplugged memory blocks might only partially be managed by the * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The * parts not manged by the buddy are unmovable PG_offline pages. In * some cases (virtio-mem), such pages can be skipped during * memory offlining, however, cannot be moved/allocated. These * techniques might use alloc_contig_range() to hide previously * exposed pages from the buddy again (e.g., to implement some sort * of memory unplug in virtio-mem). * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create * situations where ZERO_PAGE(0) which is allocated differently * on different platforms may end up in a movable zone. ZERO_PAGE(0) * cannot be migrated. * 7. Memory-hotplug: when using memmap_on_memory and onlining the * memory to the MOVABLE zone, the vmemmap pages are also placed in * such zone. Such pages cannot be really moved around as they are * self-stored in the range, but they are treated as movable when * the range they describe is about to be offlined. * * In general, no unmovable allocations that degrade memory offlining * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) * have to expect that migrating pages in ZONE_MOVABLE can fail (even * if has_unmovable_pages() states that there are no unmovable pages, * there can be false negatives). */ ZONE_MOVABLE, #ifdef CONFIG_ZONE_DEVICE ZONE_DEVICE, #endif __MAX_NR_ZONES }; #ifndef __GENERATING_BOUNDS_H #define ASYNC_AND_SYNC 2 struct zone { /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long _watermark[NR_WMARK]; unsigned long watermark_boost; unsigned long nr_reserved_highatomic; unsigned long nr_free_highatomic; /* * We don't know if the memory that we're going to allocate will be * freeable or/and it will be released eventually, so to avoid totally * wasting several GB of ram we must reserve some of the lower zone * memory (otherwise we risk to run OOM on the lower zones despite * there being tons of freeable ram on the higher zones). This array is * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl * changes. */ long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA int node; #endif struct pglist_data *zone_pgdat; struct per_cpu_pages __percpu *per_cpu_pageset; struct per_cpu_zonestat __percpu *per_cpu_zonestats; /* * the high and batch values are copied to individual pagesets for * faster access */ int pageset_high_min; int pageset_high_max; int pageset_batch; #ifndef CONFIG_SPARSEMEM /* * Flags for a pageblock_nr_pages block. See pageblock-flags.h. * In SPARSEMEM, this map is stored in struct mem_section */ unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; /* * spanned_pages is the total pages spanned by the zone, including * holes, which is calculated as: * spanned_pages = zone_end_pfn - zone_start_pfn; * * present_pages is physical pages existing within the zone, which * is calculated as: * present_pages = spanned_pages - absent_pages(pages in holes); * * present_early_pages is present pages existing within the zone * located on memory available since early boot, excluding hotplugged * memory. * * managed_pages is present pages managed by the buddy system, which * is calculated as (reserved_pages includes pages allocated by the * bootmem allocator): * managed_pages = present_pages - reserved_pages; * * cma pages is present pages that are assigned for CMA use * (MIGRATE_CMA). * * So present_pages may be used by memory hotplug or memory power * management logic to figure out unmanaged pages by checking * (present_pages - managed_pages). And managed_pages should be used * by page allocator and vm scanner to calculate all kinds of watermarks * and thresholds. * * Locking rules: * * zone_start_pfn and spanned_pages are protected by span_seqlock. * It is a seqlock because it has to be read outside of zone->lock, * and it is done in the main allocator path. But, it is written * quite infrequently. * * The span_seq lock is declared along with zone->lock because it is * frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. * * Write access to present_pages at runtime should be protected by * mem_hotplug_begin/done(). Any reader who can't tolerant drift of * present_pages should use get_online_mems() to get a stable value. */ atomic_long_t managed_pages; unsigned long spanned_pages; unsigned long present_pages; #if defined(CONFIG_MEMORY_HOTPLUG) unsigned long present_early_pages; #endif #ifdef CONFIG_CMA unsigned long cma_pages; #endif const char *name; #ifdef CONFIG_MEMORY_ISOLATION /* * Number of isolated pageblock. It is used to solve incorrect * freepage counting problem due to racy retrieving migratetype * of pageblock. Protected by zone->lock. */ unsigned long nr_isolate_pageblock; #endif #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; #endif int initialized; /* Write-intensive fields used from the page allocator */ CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ struct free_area free_area[NR_PAGE_ORDERS]; #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; #endif /* zone flags, see below */ unsigned long flags; /* Primarily protects free_area */ spinlock_t lock; /* Write-intensive fields used by compaction and vmstats. */ CACHELINE_PADDING(_pad2_); /* * When free pages are below this point, additional steps are taken * when reading the number of free pages to avoid per-cpu counter * drift allowing watermarks to be breached */ unsigned long percpu_drift_mark; #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* pfn where compaction free scanner should start */ unsigned long compact_cached_free_pfn; /* pfn where compaction migration scanner should start */ unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC]; unsigned long compact_init_migrate_pfn; unsigned long compact_init_free_pfn; #endif #ifdef CONFIG_COMPACTION /* * On compaction failure, 1<<compact_defer_shift compactions * are skipped before trying again. The number attempted since * last failure is tracked with compact_considered. * compact_order_failed is the minimum compaction failed order. */ unsigned int compact_considered; unsigned int compact_defer_shift; int compact_order_failed; #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* Set to true when the PG_migrate_skip bits should be cleared */ bool compact_blockskip_flush; #endif bool contiguous; CACHELINE_PADDING(_pad3_); /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { PGDAT_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. */ PGDAT_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ }; enum zone_flags { ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. * Cleared when kswapd is woken. */ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; static inline unsigned long wmark_pages(const struct zone *z, enum zone_watermarks w) { return z->_watermark[w] + z->watermark_boost; } static inline unsigned long min_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_MIN); } static inline unsigned long low_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_LOW); } static inline unsigned long high_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_HIGH); } static inline unsigned long promo_wmark_pages(const struct zone *z) { return wmark_pages(z, WMARK_PROMO); } static inline unsigned long zone_managed_pages(struct zone *zone) { return (unsigned long)atomic_long_read(&zone->managed_pages); } static inline unsigned long zone_cma_pages(struct zone *zone) { #ifdef CONFIG_CMA return zone->cma_pages; #else return 0; #endif } static inline unsigned long zone_end_pfn(const struct zone *zone) { return zone->zone_start_pfn + zone->spanned_pages; } static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) { return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); } static inline bool zone_is_initialized(struct zone *zone) { return zone->initialized; } static inline bool zone_is_empty(struct zone *zone) { return zone->spanned_pages == 0; } #ifndef BUILD_VDSO32_64 /* * The zone field is never updated after free_area_init_core() * sets it, so none of the operations on it need to be atomic. */ /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) #define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) #define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent * sections we define the shift as 0; that plus a 0 mask ensures * the compiler will optimise away reference to them. */ #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) #define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS #define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) #define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF) ? \ SECTIONS_PGOFF : ZONES_PGOFF) #else #define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) #define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF) ? \ NODES_PGOFF : ZONES_PGOFF) #endif #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) #define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) { ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } static inline enum zone_type folio_zonenum(const struct folio *folio) { return page_zonenum(&folio->page); } #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different * pgmaps. Otherwise getting the pgmap of a given segment is not possible * without scanning the entire segment. This helper returns true either if * both pages are not zone device pages or both pages are zone device pages * with the same pgmap. */ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { if (is_zone_device_page(a) != is_zone_device_page(b)) return false; if (!is_zone_device_page(a)) return true; return a->pgmap == b->pgmap; } extern void memmap_init_zone_device(struct zone *, unsigned long, unsigned long, struct dev_pagemap *); #else static inline bool is_zone_device_page(const struct page *page) { return false; } static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { return true; } #endif static inline bool folio_is_zone_device(const struct folio *folio) { return is_zone_device_page(&folio->page); } static inline bool is_zone_movable_page(const struct page *page) { return page_zonenum(page) == ZONE_MOVABLE; } static inline bool folio_is_zone_movable(const struct folio *folio) { return folio_zonenum(folio) == ZONE_MOVABLE; } #endif /* * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty * intersection with the given zone */ static inline bool zone_intersects(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages) { if (zone_is_empty(zone)) return false; if (start_pfn >= zone_end_pfn(zone) || start_pfn + nr_pages <= zone->zone_start_pfn) return false; return true; } /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the * queues ("queue_length >> 12") during an aging round. */ #define DEF_PRIORITY 12 /* Maximum number of zones on a zonelist */ #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) enum { ZONELIST_FALLBACK, /* zonelist with fallback */ #ifdef CONFIG_NUMA /* * The NUMA zonelists are doubled because we need zonelists that * restrict the allocations to a single node for __GFP_THISNODE. */ ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */ #endif MAX_ZONELISTS }; /* * This struct contains information about a zone in a zonelist. It is stored * here to avoid dereferences into large structures and lookups of tables */ struct zoneref { struct zone *zone; /* Pointer to actual zone */ int zone_idx; /* zone_idx(zoneref->zone) */ }; /* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are * * zonelist_zone() - Return the struct zone * for an entry in _zonerefs * zonelist_zone_idx() - Return the index of the zone for an entry * zonelist_node_idx() - Return the index of the node for an entry */ struct zonelist { struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; }; /* * The array of struct pages for flatmem. * It must be declared for SPARSEMEM as well because there are configurations * that rely on that. */ extern struct page *mem_map; #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split { spinlock_t split_queue_lock; struct list_head split_queue; unsigned long split_queue_len; }; #endif #ifdef CONFIG_MEMORY_FAILURE /* * Per NUMA node memory failure handling statistics. */ struct memory_failure_stats { /* * Number of raw pages poisoned. * Cases not accounted: memory outside kernel control, offline page, * arch-specific memory_failure (SGX), hwpoison_filter() filtered * error events, and unpoison actions from hwpoison_unpoison. */ unsigned long total; /* * Recovery results of poisoned raw pages handled by memory_failure, * in sync with mf_result. * total = ignored + failed + delayed + recovered. * total * PAGE_SIZE * #nodes = /proc/meminfo/HardwareCorrupted. */ unsigned long ignored; unsigned long failed; unsigned long delayed; unsigned long recovered; }; #endif /* * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. On UMA machines there is a single pglist_data which * describes the whole memory. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ typedef struct pglist_data { /* * node_zones contains just the zones for THIS node. Not all of the * zones may be populated, but it is the full list. It is referenced by * this node's node_zonelists as well as other node's node_zonelists. */ struct zone node_zones[MAX_NR_ZONES]; /* * node_zonelists contains references to all zones in all nodes. * Generally the first zones will be references to this node's * node_zones. */ struct zonelist node_zonelists[MAX_ZONELISTS]; int nr_zones; /* number of populated zones in this node */ #ifdef CONFIG_FLATMEM /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION struct page_ext *node_page_ext; #endif #endif #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * Must be held any time you expect node_start_pfn, * node_present_pages, node_spanned_pages or nr_zones to stay constant. * Also synchronizes pgdat->first_deferred_pfn during deferred page * init. * * pgdat_resize_lock() and pgdat_resize_unlock() are provided to * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG * or CONFIG_DEFERRED_STRUCT_PAGE_INIT. * * Nests above zone->lock and zone->span_seqlock */ spinlock_t node_size_lock; #endif unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; /* workqueues for throttling reclaim for different reasons. */ wait_queue_head_t reclaim_wait[NR_VMSCAN_THROTTLE]; atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ unsigned long nr_reclaim_start; /* nr pages written while throttled * when throttling started. */ #ifdef CONFIG_MEMORY_HOTPLUG struct mutex kswapd_lock; #endif struct task_struct *kswapd; /* Protected by kswapd_lock */ int kswapd_order; enum zone_type kswapd_highest_zoneidx; int kswapd_failures; /* Number of 'reclaimed == 0' runs */ #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_highest_zoneidx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; bool proactive_compact_trigger; #endif /* * This is a per-node reserve of pages that are not available * to userspace allocations. */ unsigned long totalreserve_pages; #ifdef CONFIG_NUMA /* * node reclaim becomes active if more unmapped pages exist. */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; #endif /* CONFIG_NUMA */ /* Write-intensive fields used by page reclaim */ CACHELINE_PADDING(_pad1_); #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* * If memory initialisation on large machines is deferred then this * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split deferred_split_queue; #endif #ifdef CONFIG_NUMA_BALANCING /* start time in ms of current promote rate limit period */ unsigned int nbp_rl_start; /* number of promote candidate pages at start time of current rate limit period */ unsigned long nbp_rl_nr_cand; /* promote threshold in ms */ unsigned int nbp_threshold; /* start time in ms of current promote threshold adjustment period */ unsigned int nbp_th_start; /* * number of promote candidate pages at start time of current promote * threshold adjustment period */ unsigned long nbp_th_nr_cand; #endif /* Fields commonly accessed by the page reclaim scanner */ /* * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED. * * Use mem_cgroup_lruvec() to look up lruvecs. */ struct lruvec __lruvec; unsigned long flags; #ifdef CONFIG_LRU_GEN /* kswap mm walk data */ struct lru_gen_mm_walk mm_walk; /* lru_gen_folio list */ struct lru_gen_memcg memcg_lru; #endif CACHELINE_PADDING(_pad2_); /* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; #ifdef CONFIG_NUMA struct memory_tier __rcu *memtier; #endif #ifdef CONFIG_MEMORY_FAILURE struct memory_failure_stats mf_stats; #endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { return pgdat->node_start_pfn + pgdat->node_spanned_pages; } #include <linux/memory_hotplug.h> void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, enum zone_type highest_zoneidx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags, long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. */ enum meminit_context { MEMINIT_EARLY, MEMINIT_HOTPLUG, }; extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); extern void lruvec_init(struct lruvec *lruvec); static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) { #ifdef CONFIG_MEMCG return lruvec->pgdat; #else return container_of(lruvec, struct pglist_data, __lruvec); #endif } #ifdef CONFIG_HAVE_MEMORYLESS_NODES int local_memory_node(int node_id); #else static inline int local_memory_node(int node_id) { return node_id; }; #endif /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) #ifdef CONFIG_ZONE_DEVICE static inline bool zone_is_zone_device(struct zone *zone) { return zone_idx(zone) == ZONE_DEVICE; } #else static inline bool zone_is_zone_device(struct zone *zone) { return false; } #endif /* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than * populated_zone(). If the whole zone is reserved then we can easily * end up with populated_zone() && !managed_zone(). */ static inline bool managed_zone(struct zone *zone) { return zone_managed_pages(zone); } /* Returns true if a zone has memory */ static inline bool populated_zone(struct zone *zone) { return zone->present_pages; } #ifdef CONFIG_NUMA static inline int zone_to_nid(struct zone *zone) { return zone->node; } static inline void zone_set_nid(struct zone *zone, int nid) { zone->node = nid; } #else static inline int zone_to_nid(struct zone *zone) { return 0; } static inline void zone_set_nid(struct zone *zone, int nid) {} #endif extern int movable_zone; static inline int is_highmem_idx(enum zone_type idx) { #ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM || (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM)); #else return 0; #endif } /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. * @zone: pointer to struct zone variable * Return: 1 for a highmem zone, 0 otherwise */ static inline int is_highmem(struct zone *zone) { return is_highmem_idx(zone_idx(zone)); } #ifdef CONFIG_ZONE_DMA bool has_managed_dma(void); #else static inline bool has_managed_dma(void) { return false; } #endif #ifndef CONFIG_NUMA extern struct pglist_data contig_page_data; static inline struct pglist_data *NODE_DATA(int nid) { return &contig_page_data; } #else /* CONFIG_NUMA */ #include <asm/mmzone.h> #endif /* !CONFIG_NUMA */ extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); extern struct zone *next_zone(struct zone *zone); /** * for_each_online_pgdat - helper macro to iterate over all online nodes * @pgdat: pointer to a pg_data_t variable */ #define for_each_online_pgdat(pgdat) \ for (pgdat = first_online_pgdat(); \ pgdat; \ pgdat = next_online_pgdat(pgdat)) /** * for_each_zone - helper macro to iterate over all memory zones * @zone: pointer to struct zone variable * * The user only needs to declare the zone variable, for_each_zone * fills it in. */ #define for_each_zone(zone) \ for (zone = (first_online_pgdat())->node_zones; \ zone; \ zone = next_zone(zone)) #define for_each_populated_zone(zone) \ for (zone = (first_online_pgdat())->node_zones; \ zone; \ zone = next_zone(zone)) \ if (!populated_zone(zone)) \ ; /* do nothing */ \ else static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; } static inline int zonelist_zone_idx(struct zoneref *zoneref) { return zoneref->zone_idx; } static inline int zonelist_node_idx(struct zoneref *zoneref) { return zone_to_nid(zoneref->zone); } struct zoneref *__next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes); /** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z: The cursor used as a starting point for the search * @highest_zoneidx: The zone index of the highest zone to return * @nodes: An optional nodemask to filter the zonelist with * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the * search. The zoneref returned is a cursor that represents the current zone * being examined. It should be advanced by one before calling * next_zones_zonelist again. * * Return: the next zone at or below highest_zoneidx within the allowed * nodemask using a cursor within a zonelist as a starting point */ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, nodemask_t *nodes) { if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx)) return z; return __next_zones_zonelist(z, highest_zoneidx, nodes); } /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist * @zonelist: The zonelist to search for a suitable zone * @highest_zoneidx: The zone index of the highest zone to return * @nodes: An optional nodemask to filter the zonelist with * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. * * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is * never NULL). This may happen either genuinely, or due to concurrent nodemask * update due to cpuset modification. * * Return: Zoneref pointer for the first suitable zone found */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, nodemask_t *nodes) { return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes); } /** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone: The current zone in the iterator * @z: The current pointer within zonelist->_zonerefs being iterated * @zlist: The zonelist being iterated * @highidx: The zone index of the highest zone to return * @nodemask: Nodemask allowed by the allocator * * This iterator iterates though all zones at or below a given zone index and * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ for (zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index * @zone: The current zone in the iterator * @z: The current pointer within zonelist->zones being iterated * @zlist: The zonelist being iterated * @highidx: The zone index of the highest zone to return * * This iterator iterates though all zones at or below a given zone index. */ #define for_each_zone_zonelist(zone, z, zlist, highidx) \ for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) /* Whether the 'nodes' are all movable nodes */ static inline bool movable_only_nodes(nodemask_t *nodes) { struct zonelist *zonelist; struct zoneref *z; int nid; if (nodes_empty(*nodes)) return false; /* * We can chose arbitrary node from the nodemask to get a * zonelist as they are interlinked. We just need to find * at least one zone that can satisfy kernel allocations. */ nid = first_node(*nodes); zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, ZONE_NORMAL, nodes); return (!zonelist_zone(z)) ? true : false; } #ifdef CONFIG_SPARSEMEM #include <asm/sparsemem.h> #endif #ifdef CONFIG_FLATMEM #define pfn_to_nid(pfn) (0) #endif #ifdef CONFIG_SPARSEMEM /* * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) #if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE #endif static inline unsigned long pfn_to_section_nr(unsigned long pfn) { return pfn >> PFN_SECTION_SHIFT; } static inline unsigned long section_nr_to_pfn(unsigned long sec) { return sec << PFN_SECTION_SHIFT; } #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) #define SUBSECTION_SHIFT 21 #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) #if SUBSECTION_SHIFT > SECTION_SIZE_BITS #error Subsection size exceeds section size #else #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) #endif #define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif /* See declaration of similar field in struct zone */ unsigned long pageblock_flags[0]; }; void subsection_map_init(unsigned long pfn, unsigned long nr_pages); struct page; struct page_ext; struct mem_section { /* * This is, logically, a pointer to an array of struct * pages. However, it is stored with some other magic. * (see sparse.c::sparse_init_one_section()) * * Additionally during early boot we encode node id of * the location of the section here to guide allocation. * (see sparse.c::memory_present()) * * Making it a UL at least makes someone do a cast * before using it wrong. */ unsigned long section_mem_map; struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use * section. (see page_ext.h about this.) */ struct page_ext *page_ext; unsigned long pad; #endif /* * WARNING: mem_section must be a power-of-2 in size for the * calculation and use of SECTION_ROOT_MASK to make sense. */ }; #ifdef CONFIG_SPARSEMEM_EXTREME #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) #else #define SECTIONS_PER_ROOT 1 #endif #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) #ifdef CONFIG_SPARSEMEM_EXTREME extern struct mem_section **mem_section; #else extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif static inline unsigned long *section_to_usemap(struct mem_section *ms) { return ms->usage->pageblock_flags; } static inline struct mem_section *__nr_to_section(unsigned long nr) { unsigned long root = SECTION_NR_TO_ROOT(nr); if (unlikely(root >= NR_SECTION_ROOTS)) return NULL; #ifdef CONFIG_SPARSEMEM_EXTREME if (!mem_section || !mem_section[root]) return NULL; #endif return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); /* * We use the lower bits of the mem_map pointer to store * a little bit of information. The pointer is calculated * as mem_map - section_nr_to_pfn(pnum). The result is * aligned to the minimum alignment of the two values: * 1. All mem_map arrays are page-aligned. * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT * lowest bits. PFN_SECTION_SHIFT is arch-specific * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the * worst combination is powerpc with 256k pages, * which results in PFN_SECTION_SHIFT equal 6. * To sum it up, at least 6 bits are available on all architectures. * However, we can exceed 6 bits on some other architectures except * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available * with the worst case of 64K pages on arm64) if we make sure the * exceeded bit is not applicable to powerpc. */ enum { SECTION_MARKED_PRESENT_BIT, SECTION_HAS_MEM_MAP_BIT, SECTION_IS_ONLINE_BIT, SECTION_IS_EARLY_BIT, #ifdef CONFIG_ZONE_DEVICE SECTION_TAINT_ZONE_DEVICE_BIT, #endif SECTION_MAP_LAST_BIT, }; #define SECTION_MARKED_PRESENT BIT(SECTION_MARKED_PRESENT_BIT) #define SECTION_HAS_MEM_MAP BIT(SECTION_HAS_MEM_MAP_BIT) #define SECTION_IS_ONLINE BIT(SECTION_IS_ONLINE_BIT) #define SECTION_IS_EARLY BIT(SECTION_IS_EARLY_BIT) #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT static inline struct page *__section_mem_map_addr(struct mem_section *section) { unsigned long map = section->section_mem_map; map &= SECTION_MAP_MASK; return (struct page *)map; } static inline int present_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } static inline int present_section_nr(unsigned long nr) { return present_section(__nr_to_section(nr)); } static inline int valid_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } static inline int early_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_EARLY)); } static inline int valid_section_nr(unsigned long nr) { return valid_section(__nr_to_section(nr)); } static inline int online_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_ONLINE)); } #ifdef CONFIG_ZONE_DEVICE static inline int online_device_section(struct mem_section *section) { unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE; return section && ((section->section_mem_map & flags) == flags); } #else static inline int online_device_section(struct mem_section *section) { return 0; } #endif static inline int online_section_nr(unsigned long nr) { return online_section(__nr_to_section(nr)); } #ifdef CONFIG_MEMORY_HOTPLUG void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); #endif static inline struct mem_section *__pfn_to_section(unsigned long pfn) { return __nr_to_section(pfn_to_section_nr(pfn)); } extern unsigned long __highest_present_section_nr; static inline int subsection_map_index(unsigned long pfn) { return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; } #ifdef CONFIG_SPARSEMEM_VMEMMAP static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); struct mem_section_usage *usage = READ_ONCE(ms->usage); return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { return 1; } #endif #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN * @pfn: the page frame number to check * * Check if there is a valid memory map entry aka struct page for the @pfn. * Note, that availability of the memory map entry does not imply that * there is actual usable memory at that @pfn. The struct page may * represent a hole or an unusable page frame. * * Return: 1 for PFNs that have memory map entries and 0 otherwise */ static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the * pfn. Else it might lead to false positives when * some of the upper bits are set, but the lower bits * match a valid pfn. */ if (PHYS_PFN(PFN_PHYS(pfn)) != pfn) return 0; if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); rcu_read_lock_sched(); if (!valid_section(ms)) { rcu_read_unlock_sched(); return 0; } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); rcu_read_unlock_sched(); return ret; } #endif static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; return present_section(__pfn_to_section(pfn)); } static inline unsigned long next_present_section_nr(unsigned long section_nr) { while (++section_nr <= __highest_present_section_nr) { if (present_section_nr(section_nr)) return section_nr; } return -1; } /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate * this restriction. */ #ifdef CONFIG_NUMA #define pfn_to_nid(pfn) \ ({ \ unsigned long __pfn_to_nid_pfn = (pfn); \ page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \ }) #else #define pfn_to_nid(pfn) (0) #endif void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) #define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */
8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014 Linaro Ltd * * Author: Ulf Hansson <ulf.hansson@linaro.org> * * MMC power sequence management */ #include <linux/kernel.h> #include <linux/err.h> #include <linux/module.h> #include <linux/of.h> #include <linux/mmc/host.h> #include "pwrseq.h" static DEFINE_MUTEX(pwrseq_list_mutex); static LIST_HEAD(pwrseq_list); int mmc_pwrseq_alloc(struct mmc_host *host) { struct device_node *np; struct mmc_pwrseq *p; np = of_parse_phandle(host->parent->of_node, "mmc-pwrseq", 0); if (!np) return 0; mutex_lock(&pwrseq_list_mutex); list_for_each_entry(p, &pwrseq_list, pwrseq_node) { if (device_match_of_node(p->dev, np)) { if (!try_module_get(p->owner)) dev_err(host->parent, "increasing module refcount failed\n"); else host->pwrseq = p; break; } } of_node_put(np); mutex_unlock(&pwrseq_list_mutex); if (!host->pwrseq) return -EPROBE_DEFER; dev_info(host->parent, "allocated mmc-pwrseq\n"); return 0; } void mmc_pwrseq_pre_power_on(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->pre_power_on) pwrseq->ops->pre_power_on(host); } void mmc_pwrseq_post_power_on(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->post_power_on) pwrseq->ops->post_power_on(host); } void mmc_pwrseq_power_off(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->power_off) pwrseq->ops->power_off(host); } void mmc_pwrseq_reset(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq && pwrseq->ops->reset) pwrseq->ops->reset(host); } void mmc_pwrseq_free(struct mmc_host *host) { struct mmc_pwrseq *pwrseq = host->pwrseq; if (pwrseq) { module_put(pwrseq->owner); host->pwrseq = NULL; } } int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq) { if (!pwrseq || !pwrseq->ops || !pwrseq->dev) return -EINVAL; mutex_lock(&pwrseq_list_mutex); list_add(&pwrseq->pwrseq_node, &pwrseq_list); mutex_unlock(&pwrseq_list_mutex); return 0; } EXPORT_SYMBOL_GPL(mmc_pwrseq_register); void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq) { if (pwrseq) { mutex_lock(&pwrseq_list_mutex); list_del(&pwrseq->pwrseq_node); mutex_unlock(&pwrseq_list_mutex); } } EXPORT_SYMBOL_GPL(mmc_pwrseq_unregister);
32 94 85 116 61 7 182 8 169 3 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tcp #if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TCP_H #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> #include <linux/sock_diag.h> #include <net/rstreason.h> /* * tcp event with arguments sk and skb * * Note: this class requires a valid sk pointer; while skb pointer could * be NULL. */ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) ); DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); #undef FN #define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason); DEFINE_RST_REASON(FN, FN) #undef FN #undef FNe #define FN(reason) { SK_RST_REASON_##reason, #reason }, #define FNe(reason) { SK_RST_REASON_##reason, #reason } /* * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, const struct sk_buff *skb__nullable, const enum sk_rst_reason reason), TP_ARGS(sk, skb__nullable, reason), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(enum sk_rst_reason, reason) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->skbaddr = skb__nullable; __entry->skaddr = sk; /* Zero means unknown state. */ __entry->state = sk ? sk->sk_state : 0; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); if (sk && sk_fullsock(sk)) { const struct inet_sock *inet = inet_sk(sk); TP_STORE_ADDR_PORTS(__entry, inet, sk); } else if (skb__nullable) { const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data; /* * We should reverse the 4-tuple of skb, so later * it can print the right flow direction of rst. */ TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr); } __entry->reason = reason; ), TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s", __entry->skbaddr, __entry->skaddr, __entry->saddr, __entry->daddr, __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN", __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe))) ); #undef FN #undef FNe /* * tcp event with arguments sk * * Note: this class requires a valid sk pointer. */ DECLARE_EVENT_CLASS(tcp_event_sk, TP_PROTO(struct sock *sk), TP_ARGS(sk), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u64, sock_cookie) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), TP_ARGS(sk, req), TP_STRUCT__entry( __field(const void *, skaddr) __field(const void *, req) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); __be32 *p32; __entry->skaddr = sk; __entry->req = req; __entry->sport = ireq->ir_num; __entry->dport = ntohs(ireq->ir_rmt_port); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = ireq->ir_loc_addr; p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) ); #include <trace/events/net_probe_common.h> TRACE_EVENT(tcp_probe, TP_PROTO(struct sock *sk, struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, mark) __field(__u16, data_len) __field(__u32, snd_nxt) __field(__u32, snd_una) __field(__u32, snd_cwnd) __field(__u32, ssthresh) __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) __field(__u64, sock_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; const struct inet_sock *inet = inet_sk(sk); const struct tcp_sock *tp = tcp_sk(sk); memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; __entry->family = sk->sk_family; __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tcp_snd_cwnd(tp); __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; __entry->sock_cookie = sock_gen_cookie(sk); __entry->skbaddr = skb; __entry->skaddr = sk; ), TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx skbaddr=%p skaddr=%p", show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->skbaddr, __entry->skaddr) ); /* * tcp event with only skb */ DECLARE_EVENT_CLASS(tcp_event_skb, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field(const void *, skbaddr) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->skbaddr = skb; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); ), TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", __entry->skbaddr, __entry->saddr, __entry->daddr) ); DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); TRACE_EVENT(tcp_cong_state_set, TP_PROTO(struct sock *sk, const u8 ca_state), TP_ARGS(sk, ca_state), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u8, cong_state) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->cong_state = ca_state; ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->cong_state) ); DECLARE_EVENT_CLASS(tcp_hash_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ') ); DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DECLARE_EVENT_CLASS(tcp_ao_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) __field(__u8, keyid) __field(__u8, rnext) __field(__u8, maclen) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; __entry->keyid = keyid; __entry->rnext = rnext; __entry->maclen = maclen; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ', __entry->keyid, __entry->rnext, __entry->maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DECLARE_EVENT_CLASS(tcp_ao_event_sk, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u8, keyid) __field(__u8, rnext) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->keyid = keyid; __entry->rnext = rnext; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->keyid, __entry->rnext) ); DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext) ); DECLARE_EVENT_CLASS(tcp_ao_event_sne, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, new_sne) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->new_sne = new_sne; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM v4l2 #if !defined(_TRACE_V4L2_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_V4L2_H #include <linux/tracepoint.h> #include <media/videobuf2-v4l2.h> /* Enums require being exported to userspace, for user tool parsing */ #undef EM #undef EMe #define EM(a, b) TRACE_DEFINE_ENUM(a); #define EMe(a, b) TRACE_DEFINE_ENUM(a); #define show_type(type) \ __print_symbolic(type, SHOW_TYPE) #define SHOW_TYPE \ EM( V4L2_BUF_TYPE_VIDEO_CAPTURE, "VIDEO_CAPTURE" ) \ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT, "VIDEO_OUTPUT" ) \ EM( V4L2_BUF_TYPE_VIDEO_OVERLAY, "VIDEO_OVERLAY" ) \ EM( V4L2_BUF_TYPE_VBI_CAPTURE, "VBI_CAPTURE" ) \ EM( V4L2_BUF_TYPE_VBI_OUTPUT, "VBI_OUTPUT" ) \ EM( V4L2_BUF_TYPE_SLICED_VBI_CAPTURE, "SLICED_VBI_CAPTURE" ) \ EM( V4L2_BUF_TYPE_SLICED_VBI_OUTPUT, "SLICED_VBI_OUTPUT" ) \ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" ) \ EM( V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" ) \ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, "VIDEO_OUTPUT_MPLANE" ) \ EM( V4L2_BUF_TYPE_SDR_CAPTURE, "SDR_CAPTURE" ) \ EM( V4L2_BUF_TYPE_SDR_OUTPUT, "SDR_OUTPUT" ) \ EM( V4L2_BUF_TYPE_META_CAPTURE, "META_CAPTURE" ) \ EMe(V4L2_BUF_TYPE_PRIVATE, "PRIVATE" ) SHOW_TYPE #define show_field(field) \ __print_symbolic(field, SHOW_FIELD) #define SHOW_FIELD \ EM( V4L2_FIELD_ANY, "ANY" ) \ EM( V4L2_FIELD_NONE, "NONE" ) \ EM( V4L2_FIELD_TOP, "TOP" ) \ EM( V4L2_FIELD_BOTTOM, "BOTTOM" ) \ EM( V4L2_FIELD_INTERLACED, "INTERLACED" ) \ EM( V4L2_FIELD_SEQ_TB, "SEQ_TB" ) \ EM( V4L2_FIELD_SEQ_BT, "SEQ_BT" ) \ EM( V4L2_FIELD_ALTERNATE, "ALTERNATE" ) \ EM( V4L2_FIELD_INTERLACED_TB, "INTERLACED_TB" ) \ EMe( V4L2_FIELD_INTERLACED_BT, "INTERLACED_BT" ) SHOW_FIELD /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a, b) {a, b}, #define EMe(a, b) {a, b} /* V4L2_TC_TYPE_* are macros, not defines, they do not need processing */ #define show_timecode_type(type) \ __print_symbolic(type, \ { V4L2_TC_TYPE_24FPS, "24FPS" }, \ { V4L2_TC_TYPE_25FPS, "25FPS" }, \ { V4L2_TC_TYPE_30FPS, "30FPS" }, \ { V4L2_TC_TYPE_50FPS, "50FPS" }, \ { V4L2_TC_TYPE_60FPS, "60FPS" }) #define show_flags(flags) \ __print_flags(flags, "|", \ { V4L2_BUF_FLAG_MAPPED, "MAPPED" }, \ { V4L2_BUF_FLAG_QUEUED, "QUEUED" }, \ { V4L2_BUF_FLAG_DONE, "DONE" }, \ { V4L2_BUF_FLAG_KEYFRAME, "KEYFRAME" }, \ { V4L2_BUF_FLAG_PFRAME, "PFRAME" }, \ { V4L2_BUF_FLAG_BFRAME, "BFRAME" }, \ { V4L2_BUF_FLAG_ERROR, "ERROR" }, \ { V4L2_BUF_FLAG_TIMECODE, "TIMECODE" }, \ { V4L2_BUF_FLAG_PREPARED, "PREPARED" }, \ { V4L2_BUF_FLAG_NO_CACHE_INVALIDATE, "NO_CACHE_INVALIDATE" }, \ { V4L2_BUF_FLAG_NO_CACHE_CLEAN, "NO_CACHE_CLEAN" }, \ { V4L2_BUF_FLAG_TIMESTAMP_MASK, "TIMESTAMP_MASK" }, \ { V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN, "TIMESTAMP_UNKNOWN" }, \ { V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \ { V4L2_BUF_FLAG_TIMESTAMP_COPY, "TIMESTAMP_COPY" }, \ { V4L2_BUF_FLAG_LAST, "LAST" }) #define show_timecode_flags(flags) \ __print_flags(flags, "|", \ { V4L2_TC_FLAG_DROPFRAME, "DROPFRAME" }, \ { V4L2_TC_FLAG_COLORFRAME, "COLORFRAME" }, \ { V4L2_TC_USERBITS_USERDEFINED, "USERBITS_USERDEFINED" }, \ { V4L2_TC_USERBITS_8BITCHARS, "USERBITS_8BITCHARS" }) DECLARE_EVENT_CLASS(v4l2_event_class, TP_PROTO(int minor, struct v4l2_buffer *buf), TP_ARGS(minor, buf), TP_STRUCT__entry( __field(int, minor) __field(u32, index) __field(u32, type) __field(u32, bytesused) __field(u32, flags) __field(u32, field) __field(s64, timestamp) __field(u32, timecode_type) __field(u32, timecode_flags) __field(u8, timecode_frames) __field(u8, timecode_seconds) __field(u8, timecode_minutes) __field(u8, timecode_hours) __field(u8, timecode_userbits0) __field(u8, timecode_userbits1) __field(u8, timecode_userbits2) __field(u8, timecode_userbits3) __field(u32, sequence) ), TP_fast_assign( __entry->minor = minor; __entry->index = buf->index; __entry->type = buf->type; __entry->bytesused = buf->bytesused; __entry->flags = buf->flags; __entry->field = buf->field; __entry->timestamp = v4l2_buffer_get_timestamp(buf); __entry->timecode_type = buf->timecode.type; __entry->timecode_flags = buf->timecode.flags; __entry->timecode_frames = buf->timecode.frames; __entry->timecode_seconds = buf->timecode.seconds; __entry->timecode_minutes = buf->timecode.minutes; __entry->timecode_hours = buf->timecode.hours; __entry->timecode_userbits0 = buf->timecode.userbits[0]; __entry->timecode_userbits1 = buf->timecode.userbits[1]; __entry->timecode_userbits2 = buf->timecode.userbits[2]; __entry->timecode_userbits3 = buf->timecode.userbits[3]; __entry->sequence = buf->sequence; ), TP_printk("minor = %d, index = %u, type = %s, bytesused = %u, " "flags = %s, field = %s, timestamp = %llu, " "timecode = { type = %s, flags = %s, frames = %u, " "seconds = %u, minutes = %u, hours = %u, " "userbits = { %u %u %u %u } }, sequence = %u", __entry->minor, __entry->index, show_type(__entry->type), __entry->bytesused, show_flags(__entry->flags), show_field(__entry->field), __entry->timestamp, show_timecode_type(__entry->timecode_type), show_timecode_flags(__entry->timecode_flags), __entry->timecode_frames, __entry->timecode_seconds, __entry->timecode_minutes, __entry->timecode_hours, __entry->timecode_userbits0, __entry->timecode_userbits1, __entry->timecode_userbits2, __entry->timecode_userbits3, __entry->sequence ) ) DEFINE_EVENT(v4l2_event_class, v4l2_dqbuf, TP_PROTO(int minor, struct v4l2_buffer *buf), TP_ARGS(minor, buf) ); DEFINE_EVENT(v4l2_event_class, v4l2_qbuf, TP_PROTO(int minor, struct v4l2_buffer *buf), TP_ARGS(minor, buf) ); DECLARE_EVENT_CLASS(vb2_v4l2_event_class, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb), TP_STRUCT__entry( __field(int, minor) __field(u32, flags) __field(u32, field) __field(u64, timestamp) __field(u32, timecode_type) __field(u32, timecode_flags) __field(u8, timecode_frames) __field(u8, timecode_seconds) __field(u8, timecode_minutes) __field(u8, timecode_hours) __field(u8, timecode_userbits0) __field(u8, timecode_userbits1) __field(u8, timecode_userbits2) __field(u8, timecode_userbits3) __field(u32, sequence) ), TP_fast_assign( struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct v4l2_fh *owner = q->owner; __entry->minor = owner ? owner->vdev->minor : -1; __entry->flags = vbuf->flags; __entry->field = vbuf->field; __entry->timestamp = vb->timestamp; __entry->timecode_type = vbuf->timecode.type; __entry->timecode_flags = vbuf->timecode.flags; __entry->timecode_frames = vbuf->timecode.frames; __entry->timecode_seconds = vbuf->timecode.seconds; __entry->timecode_minutes = vbuf->timecode.minutes; __entry->timecode_hours = vbuf->timecode.hours; __entry->timecode_userbits0 = vbuf->timecode.userbits[0]; __entry->timecode_userbits1 = vbuf->timecode.userbits[1]; __entry->timecode_userbits2 = vbuf->timecode.userbits[2]; __entry->timecode_userbits3 = vbuf->timecode.userbits[3]; __entry->sequence = vbuf->sequence; ), TP_printk("minor=%d flags = %s, field = %s, " "timestamp = %llu, timecode = { type = %s, flags = %s, " "frames = %u, seconds = %u, minutes = %u, hours = %u, " "userbits = { %u %u %u %u } }, sequence = %u", __entry->minor, show_flags(__entry->flags), show_field(__entry->field), __entry->timestamp, show_timecode_type(__entry->timecode_type), show_timecode_flags(__entry->timecode_flags), __entry->timecode_frames, __entry->timecode_seconds, __entry->timecode_minutes, __entry->timecode_hours, __entry->timecode_userbits0, __entry->timecode_userbits1, __entry->timecode_userbits2, __entry->timecode_userbits3, __entry->sequence ) ) DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_buf_done, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_buf_queue, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_dqbuf, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); DEFINE_EVENT(vb2_v4l2_event_class, vb2_v4l2_qbuf, TP_PROTO(struct vb2_queue *q, struct vb2_buffer *vb), TP_ARGS(q, vb) ); #endif /* if !defined(_TRACE_V4L2_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ #include <trace/define_trace.h>
3 2 1 8 2 6 2 6 6 5 1 10 2 7 7 1 1 3 3 5 1 1 2 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 // SPDX-License-Identifier: GPL-2.0 /* * Driver for Meywa-Denki & KAYAC YUREX * * Copyright (C) 2010 Tomoki Sekiyama (tomoki.sekiyama@gmail.com) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/kref.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/hid.h> #define DRIVER_AUTHOR "Tomoki Sekiyama" #define DRIVER_DESC "Driver for Meywa-Denki & KAYAC YUREX" #define YUREX_VENDOR_ID 0x0c45 #define YUREX_PRODUCT_ID 0x1010 #define CMD_ACK '!' #define CMD_ANIMATE 'A' #define CMD_COUNT 'C' #define CMD_LED 'L' #define CMD_READ 'R' #define CMD_SET 'S' #define CMD_VERSION 'V' #define CMD_EOF 0x0d #define CMD_PADDING 0xff #define YUREX_BUF_SIZE 8 #define YUREX_WRITE_TIMEOUT (HZ*2) /* table of devices that work with this driver */ static struct usb_device_id yurex_table[] = { { USB_DEVICE(YUREX_VENDOR_ID, YUREX_PRODUCT_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, yurex_table); #ifdef CONFIG_USB_DYNAMIC_MINORS #define YUREX_MINOR_BASE 0 #else #define YUREX_MINOR_BASE 192 #endif /* Structure to hold all of our device specific stuff */ struct usb_yurex { struct usb_device *udev; struct usb_interface *interface; __u8 int_in_endpointAddr; struct urb *urb; /* URB for interrupt in */ unsigned char *int_buffer; /* buffer for intterupt in */ struct urb *cntl_urb; /* URB for control msg */ struct usb_ctrlrequest *cntl_req; /* req for control msg */ unsigned char *cntl_buffer; /* buffer for control msg */ struct kref kref; struct mutex io_mutex; unsigned long disconnected:1; struct fasync_struct *async_queue; wait_queue_head_t waitq; spinlock_t lock; __s64 bbu; /* BBU from device */ }; #define to_yurex_dev(d) container_of(d, struct usb_yurex, kref) static struct usb_driver yurex_driver; static const struct file_operations yurex_fops; static void yurex_control_callback(struct urb *urb) { struct usb_yurex *dev = urb->context; int status = urb->status; if (status) { dev_err(&urb->dev->dev, "%s - control failed: %d\n", __func__, status); wake_up_interruptible(&dev->waitq); return; } /* on success, sender woken up by CMD_ACK int in, or timeout */ } static void yurex_delete(struct kref *kref) { struct usb_yurex *dev = to_yurex_dev(kref); dev_dbg(&dev->interface->dev, "%s\n", __func__); if (dev->cntl_urb) { usb_kill_urb(dev->cntl_urb); kfree(dev->cntl_req); usb_free_coherent(dev->udev, YUREX_BUF_SIZE, dev->cntl_buffer, dev->cntl_urb->transfer_dma); usb_free_urb(dev->cntl_urb); } if (dev->urb) { usb_kill_urb(dev->urb); usb_free_coherent(dev->udev, YUREX_BUF_SIZE, dev->int_buffer, dev->urb->transfer_dma); usb_free_urb(dev->urb); } usb_put_intf(dev->interface); usb_put_dev(dev->udev); kfree(dev); } /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with the driver core */ static struct usb_class_driver yurex_class = { .name = "yurex%d", .fops = &yurex_fops, .minor_base = YUREX_MINOR_BASE, }; static void yurex_interrupt(struct urb *urb) { struct usb_yurex *dev = urb->context; unsigned char *buf = dev->int_buffer; int status = urb->status; unsigned long flags; int retval, i; switch (status) { case 0: /*success*/ break; /* The device is terminated or messed up, give up */ case -EOVERFLOW: dev_err(&dev->interface->dev, "%s - overflow with length %d, actual length is %d\n", __func__, YUREX_BUF_SIZE, dev->urb->actual_length); return; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EILSEQ: case -EPROTO: case -ETIME: return; default: dev_err(&dev->interface->dev, "%s - unknown status received: %d\n", __func__, status); return; } /* handle received message */ switch (buf[0]) { case CMD_COUNT: case CMD_READ: if (buf[6] == CMD_EOF) { spin_lock_irqsave(&dev->lock, flags); dev->bbu = 0; for (i = 1; i < 6; i++) { dev->bbu += buf[i]; if (i != 5) dev->bbu <<= 8; } dev_dbg(&dev->interface->dev, "%s count: %lld\n", __func__, dev->bbu); spin_unlock_irqrestore(&dev->lock, flags); kill_fasync(&dev->async_queue, SIGIO, POLL_IN); } else dev_dbg(&dev->interface->dev, "data format error - no EOF\n"); break; case CMD_ACK: dev_dbg(&dev->interface->dev, "%s ack: %c\n", __func__, buf[1]); wake_up_interruptible(&dev->waitq); break; } retval = usb_submit_urb(dev->urb, GFP_ATOMIC); if (retval) { dev_err(&dev->interface->dev, "%s - usb_submit_urb failed: %d\n", __func__, retval); } } static int yurex_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_yurex *dev; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int retval = -ENOMEM; DEFINE_WAIT(wait); int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto error; kref_init(&dev->kref); mutex_init(&dev->io_mutex); spin_lock_init(&dev->lock); init_waitqueue_head(&dev->waitq); dev->udev = usb_get_dev(interface_to_usbdev(interface)); dev->interface = usb_get_intf(interface); /* set up the endpoint information */ iface_desc = interface->cur_altsetting; res = usb_find_int_in_endpoint(iface_desc, &endpoint); if (res) { dev_err(&interface->dev, "Could not find endpoints\n"); retval = res; goto error; } dev->int_in_endpointAddr = endpoint->bEndpointAddress; /* allocate control URB */ dev->cntl_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->cntl_urb) goto error; /* allocate buffer for control req */ dev->cntl_req = kmalloc(YUREX_BUF_SIZE, GFP_KERNEL); if (!dev->cntl_req) goto error; /* allocate buffer for control msg */ dev->cntl_buffer = usb_alloc_coherent(dev->udev, YUREX_BUF_SIZE, GFP_KERNEL, &dev->cntl_urb->transfer_dma); if (!dev->cntl_buffer) { dev_err(&interface->dev, "Could not allocate cntl_buffer\n"); goto error; } /* configure control URB */ dev->cntl_req->bRequestType = USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE; dev->cntl_req->bRequest = HID_REQ_SET_REPORT; dev->cntl_req->wValue = cpu_to_le16((HID_OUTPUT_REPORT + 1) << 8); dev->cntl_req->wIndex = cpu_to_le16(iface_desc->desc.bInterfaceNumber); dev->cntl_req->wLength = cpu_to_le16(YUREX_BUF_SIZE); usb_fill_control_urb(dev->cntl_urb, dev->udev, usb_sndctrlpipe(dev->udev, 0), (void *)dev->cntl_req, dev->cntl_buffer, YUREX_BUF_SIZE, yurex_control_callback, dev); dev->cntl_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* allocate interrupt URB */ dev->urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb) goto error; /* allocate buffer for interrupt in */ dev->int_buffer = usb_alloc_coherent(dev->udev, YUREX_BUF_SIZE, GFP_KERNEL, &dev->urb->transfer_dma); if (!dev->int_buffer) { dev_err(&interface->dev, "Could not allocate int_buffer\n"); goto error; } /* configure interrupt URB */ usb_fill_int_urb(dev->urb, dev->udev, usb_rcvintpipe(dev->udev, dev->int_in_endpointAddr), dev->int_buffer, YUREX_BUF_SIZE, yurex_interrupt, dev, 1); dev->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (usb_submit_urb(dev->urb, GFP_KERNEL)) { retval = -EIO; dev_err(&interface->dev, "Could not submitting URB\n"); goto error; } /* save our data pointer in this interface device */ usb_set_intfdata(interface, dev); dev->bbu = -1; /* we can register the device now, as it is ready */ retval = usb_register_dev(interface, &yurex_class); if (retval) { dev_err(&interface->dev, "Not able to get a minor for this device.\n"); usb_set_intfdata(interface, NULL); goto error; } dev_info(&interface->dev, "USB YUREX device now attached to Yurex #%d\n", interface->minor); return 0; error: if (dev) /* this frees allocated memory */ kref_put(&dev->kref, yurex_delete); return retval; } static void yurex_disconnect(struct usb_interface *interface) { struct usb_yurex *dev; int minor = interface->minor; dev = usb_get_intfdata(interface); usb_set_intfdata(interface, NULL); /* give back our minor */ usb_deregister_dev(interface, &yurex_class); /* prevent more I/O from starting */ usb_poison_urb(dev->urb); usb_poison_urb(dev->cntl_urb); mutex_lock(&dev->io_mutex); dev->disconnected = 1; mutex_unlock(&dev->io_mutex); /* wakeup waiters */ kill_fasync(&dev->async_queue, SIGIO, POLL_IN); wake_up_interruptible(&dev->waitq); /* decrement our usage count */ kref_put(&dev->kref, yurex_delete); dev_info(&interface->dev, "USB YUREX #%d now disconnected\n", minor); } static struct usb_driver yurex_driver = { .name = "yurex", .probe = yurex_probe, .disconnect = yurex_disconnect, .id_table = yurex_table, }; static int yurex_fasync(int fd, struct file *file, int on) { struct usb_yurex *dev; dev = file->private_data; return fasync_helper(fd, file, on, &dev->async_queue); } static int yurex_open(struct inode *inode, struct file *file) { struct usb_yurex *dev; struct usb_interface *interface; int subminor; int retval = 0; subminor = iminor(inode); interface = usb_find_interface(&yurex_driver, subminor); if (!interface) { printk(KERN_ERR "%s - error, can't find device for minor %d", __func__, subminor); retval = -ENODEV; goto exit; } dev = usb_get_intfdata(interface); if (!dev) { retval = -ENODEV; goto exit; } /* increment our usage count for the device */ kref_get(&dev->kref); /* save our object in the file's private structure */ mutex_lock(&dev->io_mutex); file->private_data = dev; mutex_unlock(&dev->io_mutex); exit: return retval; } static int yurex_release(struct inode *inode, struct file *file) { struct usb_yurex *dev; dev = file->private_data; if (dev == NULL) return -ENODEV; /* decrement the count on our device */ kref_put(&dev->kref, yurex_delete); return 0; } static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct usb_yurex *dev; int len; char in_buffer[20]; unsigned long flags; dev = file->private_data; mutex_lock(&dev->io_mutex); if (dev->disconnected) { /* already disconnected */ mutex_unlock(&dev->io_mutex); return -ENODEV; } spin_lock_irqsave(&dev->lock, flags); len = snprintf(in_buffer, 20, "%lld\n", dev->bbu); spin_unlock_irqrestore(&dev->lock, flags); mutex_unlock(&dev->io_mutex); if (WARN_ON_ONCE(len >= sizeof(in_buffer))) return -EIO; return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); } static ssize_t yurex_write(struct file *file, const char __user *user_buffer, size_t count, loff_t *ppos) { struct usb_yurex *dev; int i, set = 0, retval = 0; char buffer[16 + 1]; char *data = buffer; unsigned long long c, c2 = 0; signed long timeout = 0; DEFINE_WAIT(wait); count = min(sizeof(buffer) - 1, count); dev = file->private_data; /* verify that we actually have some data to write */ if (count == 0) goto error; retval = mutex_lock_interruptible(&dev->io_mutex); if (retval < 0) return -EINTR; if (dev->disconnected) { /* already disconnected */ mutex_unlock(&dev->io_mutex); retval = -ENODEV; goto error; } if (copy_from_user(buffer, user_buffer, count)) { mutex_unlock(&dev->io_mutex); retval = -EFAULT; goto error; } buffer[count] = 0; memset(dev->cntl_buffer, CMD_PADDING, YUREX_BUF_SIZE); switch (buffer[0]) { case CMD_ANIMATE: case CMD_LED: dev->cntl_buffer[0] = buffer[0]; dev->cntl_buffer[1] = buffer[1]; dev->cntl_buffer[2] = CMD_EOF; break; case CMD_READ: case CMD_VERSION: dev->cntl_buffer[0] = buffer[0]; dev->cntl_buffer[1] = 0x00; dev->cntl_buffer[2] = CMD_EOF; break; case CMD_SET: data++; fallthrough; case '0' ... '9': set = 1; c = c2 = simple_strtoull(data, NULL, 0); dev->cntl_buffer[0] = CMD_SET; for (i = 1; i < 6; i++) { dev->cntl_buffer[i] = (c>>32) & 0xff; c <<= 8; } buffer[6] = CMD_EOF; break; default: mutex_unlock(&dev->io_mutex); return -EINVAL; } /* send the data as the control msg */ prepare_to_wait(&dev->waitq, &wait, TASK_INTERRUPTIBLE); dev_dbg(&dev->interface->dev, "%s - submit %c\n", __func__, dev->cntl_buffer[0]); retval = usb_submit_urb(dev->cntl_urb, GFP_ATOMIC); if (retval >= 0) timeout = schedule_timeout(YUREX_WRITE_TIMEOUT); finish_wait(&dev->waitq, &wait); /* make sure URB is idle after timeout or (spurious) CMD_ACK */ usb_kill_urb(dev->cntl_urb); mutex_unlock(&dev->io_mutex); if (retval < 0) { dev_err(&dev->interface->dev, "%s - failed to send bulk msg, error %d\n", __func__, retval); goto error; } if (set && timeout) { spin_lock_irq(&dev->lock); dev->bbu = c2; spin_unlock_irq(&dev->lock); } return timeout ? count : -EIO; error: return retval; } static const struct file_operations yurex_fops = { .owner = THIS_MODULE, .read = yurex_read, .write = yurex_write, .open = yurex_open, .release = yurex_release, .fasync = yurex_fasync, .llseek = default_llseek, }; module_usb_driver(yurex_driver); MODULE_DESCRIPTION("USB YUREX driver support"); MODULE_LICENSE("GPL");
1 1 1 1 1 1 1 5 3 3 2 4 2 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 // SPDX-License-Identifier: GPL-2.0-or-later /* * Infrared Toy and IR Droid RC core driver * * Copyright (C) 2020 Sean Young <sean@mess.org> * * http://dangerousprototypes.com/docs/USB_IR_Toy:_Sampling_mode * * This driver is based on the lirc driver which can be found here: * https://sourceforge.net/p/lirc/git/ci/master/tree/plugins/irtoy.c * Copyright (C) 2011 Peter Kooiman <pkooiman@gmail.com> */ #include <linux/unaligned.h> #include <linux/completion.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/usb/input.h> #include <media/rc-core.h> static const u8 COMMAND_VERSION[] = { 'v' }; // End transmit and repeat reset command so we exit sump mode static const u8 COMMAND_RESET[] = { 0xff, 0xff, 0, 0, 0, 0, 0 }; static const u8 COMMAND_SMODE_ENTER[] = { 's' }; static const u8 COMMAND_SMODE_EXIT[] = { 0 }; static const u8 COMMAND_TXSTART[] = { 0x26, 0x24, 0x25, 0x03 }; #define REPLY_XMITCOUNT 't' #define REPLY_XMITSUCCESS 'C' #define REPLY_VERSION 'V' #define REPLY_SAMPLEMODEPROTO 'S' #define TIMEOUT 500 #define LEN_XMITRES 3 #define LEN_VERSION 4 #define LEN_SAMPLEMODEPROTO 3 #define MIN_FW_VERSION 20 #define UNIT_US 21 #define MAX_TIMEOUT_US (UNIT_US * U16_MAX) #define MAX_PACKET 64 enum state { STATE_IRDATA, STATE_COMMAND_NO_RESP, STATE_COMMAND, STATE_TX, }; struct irtoy { struct device *dev; struct usb_device *usbdev; struct rc_dev *rc; struct urb *urb_in, *urb_out; u8 *in; u8 *out; struct completion command_done; bool pulse; enum state state; void *tx_buf; uint tx_len; uint emitted; uint hw_version; uint sw_version; uint proto_version; char phys[64]; }; static void irtoy_response(struct irtoy *irtoy, u32 len) { switch (irtoy->state) { case STATE_COMMAND: if (len == LEN_VERSION && irtoy->in[0] == REPLY_VERSION) { uint version; irtoy->in[LEN_VERSION] = 0; if (kstrtouint(irtoy->in + 1, 10, &version)) { dev_err(irtoy->dev, "invalid version %*phN. Please make sure you are using firmware v20 or higher", LEN_VERSION, irtoy->in); break; } dev_dbg(irtoy->dev, "version %s\n", irtoy->in); irtoy->hw_version = version / 100; irtoy->sw_version = version % 100; irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); } else if (len == LEN_SAMPLEMODEPROTO && irtoy->in[0] == REPLY_SAMPLEMODEPROTO) { uint version; irtoy->in[LEN_SAMPLEMODEPROTO] = 0; if (kstrtouint(irtoy->in + 1, 10, &version)) { dev_err(irtoy->dev, "invalid sample mode response %*phN", LEN_SAMPLEMODEPROTO, irtoy->in); return; } dev_dbg(irtoy->dev, "protocol %s\n", irtoy->in); irtoy->proto_version = version; irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); } else { dev_err(irtoy->dev, "unexpected response to command: %*phN\n", len, irtoy->in); } break; case STATE_COMMAND_NO_RESP: case STATE_IRDATA: { struct ir_raw_event rawir = { .pulse = irtoy->pulse }; __be16 *in = (__be16 *)irtoy->in; int i; for (i = 0; i < len / sizeof(__be16); i++) { u16 v = be16_to_cpu(in[i]); if (v == 0xffff) { rawir.pulse = false; } else { rawir.duration = v * UNIT_US; ir_raw_event_store_with_timeout(irtoy->rc, &rawir); } rawir.pulse = !rawir.pulse; } irtoy->pulse = rawir.pulse; ir_raw_event_handle(irtoy->rc); break; } case STATE_TX: if (irtoy->tx_len == 0) { if (len == LEN_XMITRES && irtoy->in[0] == REPLY_XMITCOUNT) { u16 emitted = get_unaligned_be16(irtoy->in + 1); dev_dbg(irtoy->dev, "emitted:%u\n", emitted); irtoy->emitted = emitted; } else if (len == 1 && irtoy->in[0] == REPLY_XMITSUCCESS) { irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); } } else { // send next part of tx buffer uint space = irtoy->in[0]; uint buf_len; int err; if (len != 1 || space > MAX_PACKET || space == 0) { dev_dbg(irtoy->dev, "packet length expected: %*phN\n", len, irtoy->in); break; } buf_len = min(space, irtoy->tx_len); dev_dbg(irtoy->dev, "remaining:%u sending:%u\n", irtoy->tx_len, buf_len); memcpy(irtoy->out, irtoy->tx_buf, buf_len); irtoy->urb_out->transfer_buffer_length = buf_len; err = usb_submit_urb(irtoy->urb_out, GFP_ATOMIC); if (err != 0) { dev_err(irtoy->dev, "fail to submit tx buf urb: %d\n", err); irtoy->state = STATE_IRDATA; complete(&irtoy->command_done); break; } irtoy->tx_buf += buf_len; irtoy->tx_len -= buf_len; } break; } } static void irtoy_out_callback(struct urb *urb) { struct irtoy *irtoy = urb->context; if (urb->status == 0) { if (irtoy->state == STATE_COMMAND_NO_RESP) complete(&irtoy->command_done); } else { dev_warn(irtoy->dev, "out urb status: %d\n", urb->status); } } static void irtoy_in_callback(struct urb *urb) { struct irtoy *irtoy = urb->context; int ret; switch (urb->status) { case 0: irtoy_response(irtoy, urb->actual_length); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPROTO: case -EPIPE: usb_unlink_urb(urb); return; default: dev_dbg(irtoy->dev, "in urb status: %d\n", urb->status); } ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret && ret != -ENODEV) dev_warn(irtoy->dev, "failed to resubmit urb: %d\n", ret); } static int irtoy_command(struct irtoy *irtoy, const u8 *cmd, int cmd_len, enum state state) { int err; init_completion(&irtoy->command_done); irtoy->state = state; memcpy(irtoy->out, cmd, cmd_len); irtoy->urb_out->transfer_buffer_length = cmd_len; err = usb_submit_urb(irtoy->urb_out, GFP_KERNEL); if (err != 0) return err; if (!wait_for_completion_timeout(&irtoy->command_done, msecs_to_jiffies(TIMEOUT))) { usb_kill_urb(irtoy->urb_out); return -ETIMEDOUT; } return 0; } static int irtoy_setup(struct irtoy *irtoy) { int err; err = irtoy_command(irtoy, COMMAND_RESET, sizeof(COMMAND_RESET), STATE_COMMAND_NO_RESP); if (err != 0) { dev_err(irtoy->dev, "could not write reset command: %d\n", err); return err; } usleep_range(50, 50); // get version err = irtoy_command(irtoy, COMMAND_VERSION, sizeof(COMMAND_VERSION), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "could not write version command: %d\n", err); return err; } // enter sample mode err = irtoy_command(irtoy, COMMAND_SMODE_ENTER, sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) dev_err(irtoy->dev, "could not write sample command: %d\n", err); return err; } /* * When sending IR, it is imperative that we send the IR data as quickly * as possible to the device, so it does not run out of IR data and * introduce gaps. Allocate the buffer here, and then feed the data from * the urb callback handler. */ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) { struct irtoy *irtoy = rc->priv; unsigned int i, size; __be16 *buf; int err; size = sizeof(u16) * (count + 1); buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; for (i = 0; i < count; i++) { u16 v = DIV_ROUND_CLOSEST(txbuf[i], UNIT_US); if (!v) v = 1; buf[i] = cpu_to_be16(v); } buf[count] = cpu_to_be16(0xffff); irtoy->tx_buf = buf; irtoy->tx_len = size; irtoy->emitted = 0; // There is an issue where if the unit is receiving IR while the // first TXSTART command is sent, the device might end up hanging // with its led on. It does not respond to any command when this // happens. To work around this, re-enter sample mode. err = irtoy_command(irtoy, COMMAND_SMODE_EXIT, sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); kfree(buf); return err; } err = irtoy_command(irtoy, COMMAND_SMODE_ENTER, sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); kfree(buf); return err; } err = irtoy_command(irtoy, COMMAND_TXSTART, sizeof(COMMAND_TXSTART), STATE_TX); kfree(buf); if (err) { dev_err(irtoy->dev, "failed to send tx start command: %d\n", err); // not sure what state the device is in, reset it irtoy_setup(irtoy); return err; } if (size != irtoy->emitted) { dev_err(irtoy->dev, "expected %u emitted, got %u\n", size, irtoy->emitted); // not sure what state the device is in, reset it irtoy_setup(irtoy); return -EINVAL; } return count; } static int irtoy_tx_carrier(struct rc_dev *rc, uint32_t carrier) { struct irtoy *irtoy = rc->priv; u8 buf[3]; int err; if (carrier < 11800) return -EINVAL; buf[0] = 0x06; buf[1] = DIV_ROUND_CLOSEST(48000000, 16 * carrier) - 1; buf[2] = 0; err = irtoy_command(irtoy, buf, sizeof(buf), STATE_COMMAND_NO_RESP); if (err) dev_err(irtoy->dev, "could not write carrier command: %d\n", err); return err; } static int irtoy_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_host_interface *idesc = intf->cur_altsetting; struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep_in = NULL; struct usb_endpoint_descriptor *ep_out = NULL; struct usb_endpoint_descriptor *ep = NULL; struct irtoy *irtoy; struct rc_dev *rc; struct urb *urb; int i, pipe, err = -ENOMEM; for (i = 0; i < idesc->desc.bNumEndpoints; i++) { ep = &idesc->endpoint[i].desc; if (!ep_in && usb_endpoint_is_bulk_in(ep) && usb_endpoint_maxp(ep) == MAX_PACKET) ep_in = ep; if (!ep_out && usb_endpoint_is_bulk_out(ep) && usb_endpoint_maxp(ep) == MAX_PACKET) ep_out = ep; } if (!ep_in || !ep_out) { dev_err(&intf->dev, "required endpoints not found\n"); return -ENODEV; } irtoy = kzalloc(sizeof(*irtoy), GFP_KERNEL); if (!irtoy) return -ENOMEM; irtoy->in = kmalloc(MAX_PACKET, GFP_KERNEL); if (!irtoy->in) goto free_irtoy; irtoy->out = kmalloc(MAX_PACKET, GFP_KERNEL); if (!irtoy->out) goto free_irtoy; rc = rc_allocate_device(RC_DRIVER_IR_RAW); if (!rc) goto free_irtoy; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) goto free_rcdev; pipe = usb_rcvbulkpipe(usbdev, ep_in->bEndpointAddress); usb_fill_bulk_urb(urb, usbdev, pipe, irtoy->in, MAX_PACKET, irtoy_in_callback, irtoy); irtoy->urb_in = urb; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) goto free_rcdev; pipe = usb_sndbulkpipe(usbdev, ep_out->bEndpointAddress); usb_fill_bulk_urb(urb, usbdev, pipe, irtoy->out, MAX_PACKET, irtoy_out_callback, irtoy); irtoy->dev = &intf->dev; irtoy->usbdev = usbdev; irtoy->rc = rc; irtoy->urb_out = urb; irtoy->pulse = true; err = usb_submit_urb(irtoy->urb_in, GFP_KERNEL); if (err != 0) { dev_err(irtoy->dev, "fail to submit in urb: %d\n", err); goto free_rcdev; } err = irtoy_setup(irtoy); if (err) goto free_rcdev; dev_info(irtoy->dev, "version: hardware %u, firmware %u.%u, protocol %u", irtoy->hw_version, irtoy->sw_version / 10, irtoy->sw_version % 10, irtoy->proto_version); if (irtoy->sw_version < MIN_FW_VERSION) { dev_err(irtoy->dev, "need firmware V%02u or higher", MIN_FW_VERSION); err = -ENODEV; goto free_rcdev; } usb_make_path(usbdev, irtoy->phys, sizeof(irtoy->phys)); rc->device_name = "Infrared Toy"; rc->driver_name = KBUILD_MODNAME; rc->input_phys = irtoy->phys; usb_to_input_id(usbdev, &rc->input_id); rc->dev.parent = &intf->dev; rc->priv = irtoy; rc->tx_ir = irtoy_tx; rc->s_tx_carrier = irtoy_tx_carrier; rc->allowed_protocols = RC_PROTO_BIT_ALL_IR_DECODER; rc->map_name = RC_MAP_RC6_MCE; rc->rx_resolution = UNIT_US; rc->timeout = IR_DEFAULT_TIMEOUT; /* * end of transmission is detected by absence of a usb packet * with more pulse/spaces. However, each usb packet sent can * contain 32 pulse/spaces, which can be quite lengthy, so there * can be a delay between usb packets. For example with nec there is a * 17ms gap between packets. * * So, make timeout a largish minimum which works with most protocols. */ rc->min_timeout = MS_TO_US(40); rc->max_timeout = MAX_TIMEOUT_US; err = rc_register_device(rc); if (err) goto free_rcdev; usb_set_intfdata(intf, irtoy); return 0; free_rcdev: usb_kill_urb(irtoy->urb_out); usb_free_urb(irtoy->urb_out); usb_kill_urb(irtoy->urb_in); usb_free_urb(irtoy->urb_in); rc_free_device(rc); free_irtoy: kfree(irtoy->in); kfree(irtoy->out); kfree(irtoy); return err; } static void irtoy_disconnect(struct usb_interface *intf) { struct irtoy *ir = usb_get_intfdata(intf); rc_unregister_device(ir->rc); usb_set_intfdata(intf, NULL); usb_kill_urb(ir->urb_out); usb_free_urb(ir->urb_out); usb_kill_urb(ir->urb_in); usb_free_urb(ir->urb_in); kfree(ir->in); kfree(ir->out); kfree(ir); } static const struct usb_device_id irtoy_table[] = { { USB_DEVICE_INTERFACE_CLASS(0x04d8, 0xfd08, USB_CLASS_CDC_DATA) }, { USB_DEVICE_INTERFACE_CLASS(0x04d8, 0xf58b, USB_CLASS_CDC_DATA) }, { } }; static struct usb_driver irtoy_driver = { .name = KBUILD_MODNAME, .probe = irtoy_probe, .disconnect = irtoy_disconnect, .id_table = irtoy_table, }; module_usb_driver(irtoy_driver); MODULE_AUTHOR("Sean Young <sean@mess.org>"); MODULE_DESCRIPTION("Infrared Toy and IR Droid driver"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(usb, irtoy_table);
34 37 36 1 37 37 33 34 37 6 31 30 6 6 34 34 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 // SPDX-License-Identifier: GPL-2.0 /* dvb-usb-dvb.c is part of the DVB USB library. * * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de) * see dvb-usb-init.c for copyright information. * * This file contains functions for initializing and handling the * linux-dvb API. */ #include "dvb-usb-common.h" #include <media/media-device.h> /* does the complete input transfer handling */ static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff) { struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv; int newfeedcount, ret; if (adap == NULL) return -ENODEV; if ((adap->active_fe < 0) || (adap->active_fe >= adap->num_frontends_initialized)) { return -EINVAL; } newfeedcount = adap->feedcount + (onoff ? 1 : -1); /* stop feed before setting a new pid if there will be no pid anymore */ if (newfeedcount == 0) { deb_ts("stop feeding\n"); usb_urb_kill(&adap->fe_adap[adap->active_fe].stream); if (adap->props.fe[adap->active_fe].streaming_ctrl != NULL) { ret = adap->props.fe[adap->active_fe].streaming_ctrl(adap, 0); if (ret < 0) { err("error while stopping stream."); return ret; } } } adap->feedcount = newfeedcount; /* activate the pid on the device specific pid_filter */ deb_ts("setting pid (%s): %5d %04x at index %d '%s'\n", adap->fe_adap[adap->active_fe].pid_filtering ? "yes" : "no", dvbdmxfeed->pid, dvbdmxfeed->pid, dvbdmxfeed->index, onoff ? "on" : "off"); if (adap->props.fe[adap->active_fe].caps & DVB_USB_ADAP_HAS_PID_FILTER && adap->fe_adap[adap->active_fe].pid_filtering && adap->props.fe[adap->active_fe].pid_filter != NULL) adap->props.fe[adap->active_fe].pid_filter(adap, dvbdmxfeed->index, dvbdmxfeed->pid, onoff); /* start the feed if this was the first feed and there is still a feed * for reception. */ if (adap->feedcount == onoff && adap->feedcount > 0) { deb_ts("controlling pid parser\n"); if (adap->props.fe[adap->active_fe].caps & DVB_USB_ADAP_HAS_PID_FILTER && adap->props.fe[adap->active_fe].caps & DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF && adap->props.fe[adap->active_fe].pid_filter_ctrl != NULL) { ret = adap->props.fe[adap->active_fe].pid_filter_ctrl(adap, adap->fe_adap[adap->active_fe].pid_filtering); if (ret < 0) { err("could not handle pid_parser"); return ret; } } deb_ts("start feeding\n"); if (adap->props.fe[adap->active_fe].streaming_ctrl != NULL) { ret = adap->props.fe[adap->active_fe].streaming_ctrl(adap, 1); if (ret < 0) { err("error while enabling fifo."); return ret; } } deb_ts("submitting all URBs\n"); usb_urb_submit(&adap->fe_adap[adap->active_fe].stream); } return 0; } static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed) { deb_ts("start pid: 0x%04x, feedtype: %d\n", dvbdmxfeed->pid, dvbdmxfeed->type); return dvb_usb_ctrl_feed(dvbdmxfeed, 1); } static int dvb_usb_stop_feed(struct dvb_demux_feed *dvbdmxfeed) { deb_ts("stop pid: 0x%04x, feedtype: %d\n", dvbdmxfeed->pid, dvbdmxfeed->type); return dvb_usb_ctrl_feed(dvbdmxfeed, 0); } static int dvb_usb_media_device_init(struct dvb_usb_adapter *adap) { #ifdef CONFIG_MEDIA_CONTROLLER_DVB struct media_device *mdev; struct dvb_usb_device *d = adap->dev; struct usb_device *udev = d->udev; mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return -ENOMEM; media_device_usb_init(mdev, udev, d->desc->name); dvb_register_media_controller(&adap->dvb_adap, mdev); dev_info(&d->udev->dev, "media controller created\n"); #endif return 0; } static int dvb_usb_media_device_register(struct dvb_usb_adapter *adap) { #ifdef CONFIG_MEDIA_CONTROLLER_DVB return media_device_register(adap->dvb_adap.mdev); #else return 0; #endif } static void dvb_usb_media_device_unregister(struct dvb_usb_adapter *adap) { #ifdef CONFIG_MEDIA_CONTROLLER_DVB if (!adap->dvb_adap.mdev) return; mutex_lock(&adap->dvb_adap.mdev_lock); media_device_unregister(adap->dvb_adap.mdev); media_device_cleanup(adap->dvb_adap.mdev); kfree(adap->dvb_adap.mdev); adap->dvb_adap.mdev = NULL; mutex_unlock(&adap->dvb_adap.mdev_lock); #endif } int dvb_usb_adapter_dvb_init(struct dvb_usb_adapter *adap, short *adapter_nums) { int i; int ret = dvb_register_adapter(&adap->dvb_adap, adap->dev->desc->name, adap->dev->owner, &adap->dev->udev->dev, adapter_nums); if (ret < 0) { deb_info("dvb_register_adapter failed: error %d", ret); goto err; } adap->dvb_adap.priv = adap; ret = dvb_usb_media_device_init(adap); if (ret < 0) { deb_info("dvb_usb_media_device_init failed: error %d", ret); goto err_mc; } if (adap->dev->props.read_mac_address) { if (adap->dev->props.read_mac_address(adap->dev, adap->dvb_adap.proposed_mac) == 0) info("MAC address: %pM", adap->dvb_adap.proposed_mac); else err("MAC address reading failed."); } adap->demux.dmx.capabilities = DMX_TS_FILTERING | DMX_SECTION_FILTERING; adap->demux.priv = adap; adap->demux.filternum = 0; for (i = 0; i < adap->props.num_frontends; i++) { if (adap->demux.filternum < adap->fe_adap[i].max_feed_count) adap->demux.filternum = adap->fe_adap[i].max_feed_count; } adap->demux.feednum = adap->demux.filternum; adap->demux.start_feed = dvb_usb_start_feed; adap->demux.stop_feed = dvb_usb_stop_feed; adap->demux.write_to_decoder = NULL; if ((ret = dvb_dmx_init(&adap->demux)) < 0) { err("dvb_dmx_init failed: error %d", ret); goto err_dmx; } adap->dmxdev.filternum = adap->demux.filternum; adap->dmxdev.demux = &adap->demux.dmx; adap->dmxdev.capabilities = 0; if ((ret = dvb_dmxdev_init(&adap->dmxdev, &adap->dvb_adap)) < 0) { err("dvb_dmxdev_init failed: error %d", ret); goto err_dmx_dev; } if ((ret = dvb_net_init(&adap->dvb_adap, &adap->dvb_net, &adap->demux.dmx)) < 0) { err("dvb_net_init failed: error %d", ret); goto err_net_init; } adap->state |= DVB_USB_ADAP_STATE_DVB; return 0; err_net_init: dvb_dmxdev_release(&adap->dmxdev); err_dmx_dev: dvb_dmx_release(&adap->demux); err_dmx: dvb_usb_media_device_unregister(adap); err_mc: dvb_unregister_adapter(&adap->dvb_adap); err: return ret; } int dvb_usb_adapter_dvb_exit(struct dvb_usb_adapter *adap) { if (adap->state & DVB_USB_ADAP_STATE_DVB) { deb_info("unregistering DVB part\n"); dvb_net_release(&adap->dvb_net); adap->demux.dmx.close(&adap->demux.dmx); dvb_dmxdev_release(&adap->dmxdev); dvb_dmx_release(&adap->demux); dvb_usb_media_device_unregister(adap); dvb_unregister_adapter(&adap->dvb_adap); adap->state &= ~DVB_USB_ADAP_STATE_DVB; } return 0; } static int dvb_usb_set_active_fe(struct dvb_frontend *fe, int onoff) { struct dvb_usb_adapter *adap = fe->dvb->priv; int ret = (adap->props.frontend_ctrl) ? adap->props.frontend_ctrl(fe, onoff) : 0; if (ret < 0) { err("frontend_ctrl request failed"); return ret; } if (onoff) adap->active_fe = fe->id; return 0; } static int dvb_usb_fe_wakeup(struct dvb_frontend *fe) { struct dvb_usb_adapter *adap = fe->dvb->priv; dvb_usb_device_power_ctrl(adap->dev, 1); dvb_usb_set_active_fe(fe, 1); if (adap->fe_adap[fe->id].fe_init) adap->fe_adap[fe->id].fe_init(fe); return 0; } static int dvb_usb_fe_sleep(struct dvb_frontend *fe) { struct dvb_usb_adapter *adap = fe->dvb->priv; if (adap->fe_adap[fe->id].fe_sleep) adap->fe_adap[fe->id].fe_sleep(fe); dvb_usb_set_active_fe(fe, 0); return dvb_usb_device_power_ctrl(adap->dev, 0); } int dvb_usb_adapter_frontend_init(struct dvb_usb_adapter *adap) { int ret, i; /* register all given adapter frontends */ for (i = 0; i < adap->props.num_frontends; i++) { if (adap->props.fe[i].frontend_attach == NULL) { err("strange: '%s' #%d,%d doesn't want to attach a frontend.", adap->dev->desc->name, adap->id, i); return 0; } ret = adap->props.fe[i].frontend_attach(adap); if (ret || adap->fe_adap[i].fe == NULL) { /* only print error when there is no FE at all */ if (i == 0) err("no frontend was attached by '%s'", adap->dev->desc->name); return 0; } adap->fe_adap[i].fe->id = i; /* re-assign sleep and wakeup functions */ adap->fe_adap[i].fe_init = adap->fe_adap[i].fe->ops.init; adap->fe_adap[i].fe->ops.init = dvb_usb_fe_wakeup; adap->fe_adap[i].fe_sleep = adap->fe_adap[i].fe->ops.sleep; adap->fe_adap[i].fe->ops.sleep = dvb_usb_fe_sleep; if (dvb_register_frontend(&adap->dvb_adap, adap->fe_adap[i].fe)) { err("Frontend %d registration failed.", i); dvb_frontend_detach(adap->fe_adap[i].fe); adap->fe_adap[i].fe = NULL; /* In error case, do not try register more FEs, * still leaving already registered FEs alive. */ if (i == 0) return -ENODEV; else return 0; } /* only attach the tuner if the demod is there */ if (adap->props.fe[i].tuner_attach != NULL) adap->props.fe[i].tuner_attach(adap); adap->num_frontends_initialized++; } ret = dvb_create_media_graph(&adap->dvb_adap, true); if (ret) return ret; ret = dvb_usb_media_device_register(adap); return ret; } int dvb_usb_adapter_frontend_exit(struct dvb_usb_adapter *adap) { int i = adap->num_frontends_initialized - 1; /* unregister all given adapter frontends */ for (; i >= 0; i--) { if (adap->fe_adap[i].fe != NULL) { dvb_unregister_frontend(adap->fe_adap[i].fe); dvb_frontend_detach(adap->fe_adap[i].fe); } } adap->num_frontends_initialized = 0; return 0; }
1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 // SPDX-License-Identifier: GPL-2.0-or-later /* * Quickcam cameras initialization data * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define MODULE_NAME "tv8532" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("TV8532 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ __u8 packet; }; static const struct v4l2_pix_format sif_mode[] = { {176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* TV-8532A (ICM532A) registers (LE) */ #define R00_PART_CONTROL 0x00 #define LATENT_CHANGE 0x80 #define EXPO_CHANGE 0x04 #define R01_TIMING_CONTROL_LOW 0x01 #define CMD_EEprom_Open 0x30 #define CMD_EEprom_Close 0x29 #define R03_TABLE_ADDR 0x03 #define R04_WTRAM_DATA_L 0x04 #define R05_WTRAM_DATA_M 0x05 #define R06_WTRAM_DATA_H 0x06 #define R07_TABLE_LEN 0x07 #define R08_RAM_WRITE_ACTION 0x08 #define R0C_AD_WIDTHL 0x0c #define R0D_AD_WIDTHH 0x0d #define R0E_AD_HEIGHTL 0x0e #define R0F_AD_HEIGHTH 0x0f #define R10_AD_COL_BEGINL 0x10 #define R11_AD_COL_BEGINH 0x11 #define MIRROR 0x04 /* [10] */ #define R14_AD_ROW_BEGINL 0x14 #define R15_AD_ROWBEGINH 0x15 #define R1C_AD_EXPOSE_TIMEL 0x1c #define R20_GAIN_G1L 0x20 #define R21_GAIN_G1H 0x21 #define R22_GAIN_RL 0x22 #define R23_GAIN_RH 0x23 #define R24_GAIN_BL 0x24 #define R25_GAIN_BH 0x25 #define R26_GAIN_G2L 0x26 #define R27_GAIN_G2H 0x27 #define R28_QUANT 0x28 #define R29_LINE 0x29 #define R2C_POLARITY 0x2c #define R2D_POINT 0x2d #define R2E_POINTH 0x2e #define R2F_POINTB 0x2f #define R30_POINTBH 0x30 #define R31_UPD 0x31 #define R2A_HIGH_BUDGET 0x2a #define R2B_LOW_BUDGET 0x2b #define R34_VID 0x34 #define R35_VIDH 0x35 #define R36_PID 0x36 #define R37_PIDH 0x37 #define R39_Test1 0x39 /* GPIO */ #define R3B_Test3 0x3b /* GPIO */ #define R83_AD_IDH 0x83 #define R91_AD_SLOPEREG 0x91 #define R94_AD_BITCONTROL 0x94 static const u8 eeprom_data[][3] = { /* dataH dataM dataL */ {0x01, 0x00, 0x01}, {0x01, 0x80, 0x11}, {0x05, 0x00, 0x14}, {0x05, 0x00, 0x1c}, {0x0d, 0x00, 0x1e}, {0x05, 0x00, 0x1f}, {0x05, 0x05, 0x19}, {0x05, 0x01, 0x1b}, {0x05, 0x09, 0x1e}, {0x0d, 0x89, 0x2e}, {0x05, 0x89, 0x2f}, {0x05, 0x0d, 0xd9}, {0x05, 0x09, 0xf1}, }; /* write 1 byte */ static void reg_w1(struct gspca_dev *gspca_dev, __u16 index, __u8 value) { gspca_dev->usb_buf[0] = value; usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0x02, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, 1, 500); } /* write 2 bytes */ static void reg_w2(struct gspca_dev *gspca_dev, u16 index, u16 value) { gspca_dev->usb_buf[0] = value; gspca_dev->usb_buf[1] = value >> 8; usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0x02, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, 2, 500); } static void tv_8532WriteEEprom(struct gspca_dev *gspca_dev) { int i; reg_w1(gspca_dev, R01_TIMING_CONTROL_LOW, CMD_EEprom_Open); for (i = 0; i < ARRAY_SIZE(eeprom_data); i++) { reg_w1(gspca_dev, R03_TABLE_ADDR, i); reg_w1(gspca_dev, R04_WTRAM_DATA_L, eeprom_data[i][2]); reg_w1(gspca_dev, R05_WTRAM_DATA_M, eeprom_data[i][1]); reg_w1(gspca_dev, R06_WTRAM_DATA_H, eeprom_data[i][0]); reg_w1(gspca_dev, R08_RAM_WRITE_ACTION, 0); } reg_w1(gspca_dev, R07_TABLE_LEN, i); reg_w1(gspca_dev, R01_TIMING_CONTROL_LOW, CMD_EEprom_Close); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam; cam = &gspca_dev->cam; cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); return 0; } static void tv_8532_setReg(struct gspca_dev *gspca_dev) { reg_w1(gspca_dev, R3B_Test3, 0x0a); /* Test0Sel = 10 */ /******************************************************/ reg_w1(gspca_dev, R0E_AD_HEIGHTL, 0x90); reg_w1(gspca_dev, R0F_AD_HEIGHTH, 0x01); reg_w2(gspca_dev, R1C_AD_EXPOSE_TIMEL, 0x018f); reg_w1(gspca_dev, R10_AD_COL_BEGINL, 0x44); /* begin active line */ reg_w1(gspca_dev, R11_AD_COL_BEGINH, 0x00); /* mirror and digital gain */ reg_w1(gspca_dev, R14_AD_ROW_BEGINL, 0x0a); reg_w1(gspca_dev, R94_AD_BITCONTROL, 0x02); reg_w1(gspca_dev, R91_AD_SLOPEREG, 0x00); reg_w1(gspca_dev, R00_PART_CONTROL, LATENT_CHANGE | EXPO_CHANGE); /* = 0x84 */ } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { tv_8532WriteEEprom(gspca_dev); return 0; } static void setexposure(struct gspca_dev *gspca_dev, s32 val) { reg_w2(gspca_dev, R1C_AD_EXPOSE_TIMEL, val); reg_w1(gspca_dev, R00_PART_CONTROL, LATENT_CHANGE | EXPO_CHANGE); /* 0x84 */ } static void setgain(struct gspca_dev *gspca_dev, s32 val) { reg_w2(gspca_dev, R20_GAIN_G1L, val); reg_w2(gspca_dev, R22_GAIN_RL, val); reg_w2(gspca_dev, R24_GAIN_BL, val); reg_w2(gspca_dev, R26_GAIN_G2L, val); } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; reg_w1(gspca_dev, R0C_AD_WIDTHL, 0xe8); /* 0x20; 0x0c */ reg_w1(gspca_dev, R0D_AD_WIDTHH, 0x03); /************************************************/ reg_w1(gspca_dev, R28_QUANT, 0x90); /* 0x72 compressed mode 0x28 */ if (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) { /* 176x144 */ reg_w1(gspca_dev, R29_LINE, 0x41); /* CIF - 2 lines/packet */ } else { /* 352x288 */ reg_w1(gspca_dev, R29_LINE, 0x81); /* CIF - 2 lines/packet */ } /************************************************/ reg_w1(gspca_dev, R2C_POLARITY, 0x10); /* slow clock */ reg_w1(gspca_dev, R2D_POINT, 0x14); reg_w1(gspca_dev, R2E_POINTH, 0x01); reg_w1(gspca_dev, R2F_POINTB, 0x12); reg_w1(gspca_dev, R30_POINTBH, 0x01); tv_8532_setReg(gspca_dev); /************************************************/ reg_w1(gspca_dev, R31_UPD, 0x01); /* update registers */ msleep(200); reg_w1(gspca_dev, R31_UPD, 0x00); /* end update */ gspca_dev->empty_packet = 0; /* check the empty packets */ sd->packet = 0; /* ignore the first packets */ return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { reg_w1(gspca_dev, R3B_Test3, 0x0b); /* Test0Sel = 11 = GPIO */ } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; int packet_type0, packet_type1; packet_type0 = packet_type1 = INTER_PACKET; if (gspca_dev->empty_packet) { gspca_dev->empty_packet = 0; sd->packet = gspca_dev->pixfmt.height / 2; packet_type0 = FIRST_PACKET; } else if (sd->packet == 0) return; /* 2 more lines in 352x288 ! */ sd->packet--; if (sd->packet == 0) packet_type1 = LAST_PACKET; /* each packet contains: * - header 2 bytes * - RGRG line * - 4 bytes * - GBGB line * - 4 bytes */ gspca_frame_add(gspca_dev, packet_type0, data + 2, gspca_dev->pixfmt.width); gspca_frame_add(gspca_dev, packet_type1, data + gspca_dev->pixfmt.width + 5, gspca_dev->pixfmt.width); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_EXPOSURE: setexposure(gspca_dev, ctrl->val); break; case V4L2_CID_GAIN: setgain(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 2); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_EXPOSURE, 0, 0x18f, 1, 0x18f); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 0, 0x7ff, 1, 0x100); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x046d, 0x0920)}, {USB_DEVICE(0x046d, 0x0921)}, {USB_DEVICE(0x0545, 0x808b)}, {USB_DEVICE(0x0545, 0x8333)}, {USB_DEVICE(0x0923, 0x010f)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
115 1 1 112 2 139 139 39 2 278 33 93 354 290 1 6 2 1 1 89 8 4 114 274 70 17 10 2 1 2 21 145 141 4 2 6 115 6 1 3 13 128 104 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook */ #include <linux/bpf.h> #include "disasm.h" #define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x) static const char * const func_id_str[] = { __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN) }; #undef __BPF_FUNC_STR_FN static const char *__func_get_name(const struct bpf_insn_cbs *cbs, const struct bpf_insn *insn, char *buff, size_t len) { BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); if (!insn->src_reg && insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && func_id_str[insn->imm]) return func_id_str[insn->imm]; if (cbs && cbs->cb_call) { const char *res; res = cbs->cb_call(cbs->private_data, insn); if (res) return res; } if (insn->src_reg == BPF_PSEUDO_CALL) snprintf(buff, len, "%+d", insn->imm); else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) snprintf(buff, len, "kernel-function"); return buff; } static const char *__func_imm_name(const struct bpf_insn_cbs *cbs, const struct bpf_insn *insn, u64 full_imm, char *buff, size_t len) { if (cbs && cbs->cb_imm) return cbs->cb_imm(cbs->private_data, insn, full_imm); snprintf(buff, len, "0x%llx", (unsigned long long)full_imm); return buff; } const char *func_id_name(int id) { if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) return func_id_str[id]; else return "unknown"; } const char *const bpf_class_string[8] = { [BPF_LD] = "ld", [BPF_LDX] = "ldx", [BPF_ST] = "st", [BPF_STX] = "stx", [BPF_ALU] = "alu", [BPF_JMP] = "jmp", [BPF_JMP32] = "jmp32", [BPF_ALU64] = "alu64", }; const char *const bpf_alu_string[16] = { [BPF_ADD >> 4] = "+=", [BPF_SUB >> 4] = "-=", [BPF_MUL >> 4] = "*=", [BPF_DIV >> 4] = "/=", [BPF_OR >> 4] = "|=", [BPF_AND >> 4] = "&=", [BPF_LSH >> 4] = "<<=", [BPF_RSH >> 4] = ">>=", [BPF_NEG >> 4] = "neg", [BPF_MOD >> 4] = "%=", [BPF_XOR >> 4] = "^=", [BPF_MOV >> 4] = "=", [BPF_ARSH >> 4] = "s>>=", [BPF_END >> 4] = "endian", }; static const char *const bpf_alu_sign_string[16] = { [BPF_DIV >> 4] = "s/=", [BPF_MOD >> 4] = "s%=", }; static const char *const bpf_movsx_string[4] = { [0] = "(s8)", [1] = "(s16)", [3] = "(s32)", }; static const char *const bpf_atomic_alu_string[16] = { [BPF_ADD >> 4] = "add", [BPF_AND >> 4] = "and", [BPF_OR >> 4] = "or", [BPF_XOR >> 4] = "xor", }; static const char *const bpf_ldst_string[] = { [BPF_W >> 3] = "u32", [BPF_H >> 3] = "u16", [BPF_B >> 3] = "u8", [BPF_DW >> 3] = "u64", }; static const char *const bpf_ldsx_string[] = { [BPF_W >> 3] = "s32", [BPF_H >> 3] = "s16", [BPF_B >> 3] = "s8", }; static const char *const bpf_jmp_string[16] = { [BPF_JA >> 4] = "jmp", [BPF_JEQ >> 4] = "==", [BPF_JGT >> 4] = ">", [BPF_JLT >> 4] = "<", [BPF_JGE >> 4] = ">=", [BPF_JLE >> 4] = "<=", [BPF_JSET >> 4] = "&", [BPF_JNE >> 4] = "!=", [BPF_JSGT >> 4] = "s>", [BPF_JSLT >> 4] = "s<", [BPF_JSGE >> 4] = "s>=", [BPF_JSLE >> 4] = "s<=", [BPF_CALL >> 4] = "call", [BPF_EXIT >> 4] = "exit", }; static void print_bpf_end_insn(bpf_insn_print_t verbose, void *private_data, const struct bpf_insn *insn) { verbose(private_data, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", insn->imm, insn->dst_reg); } static void print_bpf_bswap_insn(bpf_insn_print_t verbose, void *private_data, const struct bpf_insn *insn) { verbose(private_data, "(%02x) r%d = bswap%d r%d\n", insn->code, insn->dst_reg, insn->imm, insn->dst_reg); } static bool is_sdiv_smod(const struct bpf_insn *insn) { return (BPF_OP(insn->code) == BPF_DIV || BPF_OP(insn->code) == BPF_MOD) && insn->off == 1; } static bool is_movsx(const struct bpf_insn *insn) { return BPF_OP(insn->code) == BPF_MOV && (insn->off == 8 || insn->off == 16 || insn->off == 32); } static bool is_addr_space_cast(const struct bpf_insn *insn) { return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_SPACE_CAST; } /* Special (internal-only) form of mov, used to resolve per-CPU addrs: * dst_reg = src_reg + <percpu_base_off> * BPF_ADDR_PERCPU is used as a special insn->off value. */ #define BPF_ADDR_PERCPU (-1) static inline bool is_mov_percpu_addr(const struct bpf_insn *insn) { return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU; } void print_bpf_insn(const struct bpf_insn_cbs *cbs, const struct bpf_insn *insn, bool allow_ptr_leaks) { const bpf_insn_print_t verbose = cbs->cb_print; u8 class = BPF_CLASS(insn->code); if (class == BPF_ALU || class == BPF_ALU64) { if (BPF_OP(insn->code) == BPF_END) { if (class == BPF_ALU64) print_bpf_bswap_insn(verbose, cbs->private_data, insn); else print_bpf_end_insn(verbose, cbs->private_data, insn); } else if (BPF_OP(insn->code) == BPF_NEG) { verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n", insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, class == BPF_ALU ? 'w' : 'r', insn->dst_reg); } else if (is_addr_space_cast(insn)) { verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n", insn->code, insn->dst_reg, insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm); } else if (is_mov_percpu_addr(insn)) { verbose(cbs->private_data, "(%02x) r%d = &(void __percpu *)(r%d)\n", insn->code, insn->dst_reg, insn->src_reg); } else if (BPF_SRC(insn->code) == BPF_X) { verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n", insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, is_sdiv_smod(insn) ? bpf_alu_sign_string[BPF_OP(insn->code) >> 4] : bpf_alu_string[BPF_OP(insn->code) >> 4], is_movsx(insn) ? bpf_movsx_string[(insn->off >> 3) - 1] : "", class == BPF_ALU ? 'w' : 'r', insn->src_reg); } else { verbose(cbs->private_data, "(%02x) %c%d %s %d\n", insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, is_sdiv_smod(insn) ? bpf_alu_sign_string[BPF_OP(insn->code) >> 4] : bpf_alu_string[BPF_OP(insn->code) >> 4], insn->imm); } } else if (class == BPF_STX) { if (BPF_MODE(insn->code) == BPF_MEM) verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); else if (BPF_MODE(insn->code) == BPF_ATOMIC && (insn->imm == BPF_ADD || insn->imm == BPF_AND || insn->imm == BPF_OR || insn->imm == BPF_XOR)) { verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) %s r%d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, bpf_alu_string[BPF_OP(insn->imm) >> 4], insn->src_reg); } else if (BPF_MODE(insn->code) == BPF_ATOMIC && (insn->imm == (BPF_ADD | BPF_FETCH) || insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BP