Total coverage: 220733 (12%)of 1897811
8 3 8 8 1 1 3 1 1 1 1 8 8 8 8 1 1 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 // SPDX-License-Identifier: GPL-2.0-or-later /* * SN9C2028 library * * Copyright (C) 2009 Theodore Kilgore <kilgota@auburn.edu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sn9c2028" #include "gspca.h" MODULE_AUTHOR("Theodore Kilgore"); MODULE_DESCRIPTION("Sonix SN9C2028 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 sof_read; u16 model; #define MIN_AVG_LUM 8500 #define MAX_AVG_LUM 10000 int avg_lum; u8 avg_lum_l; struct { /* autogain and gain control cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *gain; }; }; struct init_command { unsigned char instruction[6]; unsigned char to_read; /* length to read. 0 means no reply requested */ }; /* How to change the resolution of any of the VGA cams is unknown */ static const struct v4l2_pix_format vga_mode[] = { {640, 480, V4L2_PIX_FMT_SN9C2028, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 4, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* No way to change the resolution of the CIF cams is known */ static const struct v4l2_pix_format cif_mode[] = { {352, 288, V4L2_PIX_FMT_SN9C2028, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 4, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* the bytes to write are in gspca_dev->usb_buf */ static int sn9c2028_command(struct gspca_dev *gspca_dev, u8 *command) { int rc; gspca_dbg(gspca_dev, D_USBO, "sending command %02x%02x%02x%02x%02x%02x\n", command[0], command[1], command[2], command[3], command[4], command[5]); memcpy(gspca_dev->usb_buf, command, 6); rc = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_CONFIGURATION, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 2, 0, gspca_dev->usb_buf, 6, 500); if (rc < 0) { pr_err("command write [%02x] error %d\n", gspca_dev->usb_buf[0], rc); return rc; } return 0; } static int sn9c2028_read1(struct gspca_dev *gspca_dev) { int rc; rc = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, 0, gspca_dev->usb_buf, 1, 500); if (rc != 1) { pr_err("read1 error %d\n", rc); return (rc < 0) ? rc : -EIO; } gspca_dbg(gspca_dev, D_USBI, "read1 response %02x\n", gspca_dev->usb_buf[0]); return gspca_dev->usb_buf[0]; } static int sn9c2028_read4(struct gspca_dev *gspca_dev, u8 *reading) { int rc; rc = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 4, 0, gspca_dev->usb_buf, 4, 500); if (rc != 4) { pr_err("read4 error %d\n", rc); return (rc < 0) ? rc : -EIO; } memcpy(reading, gspca_dev->usb_buf, 4); gspca_dbg(gspca_dev, D_USBI, "read4 response %02x%02x%02x%02x\n", reading[0], reading[1], reading[2], reading[3]); return rc; } static int sn9c2028_long_command(struct gspca_dev *gspca_dev, u8 *command) { int i, status; __u8 reading[4]; status = sn9c2028_command(gspca_dev, command); if (status < 0) return status; status = -1; for (i = 0; i < 256 && status < 2; i++) status = sn9c2028_read1(gspca_dev); if (status < 0) { pr_err("long command status read error %d\n", status); return status; } memset(reading, 0, 4); status = sn9c2028_read4(gspca_dev, reading); if (status < 0) return status; /* in general, the first byte of the response is the first byte of * the command, or'ed with 8 */ status = sn9c2028_read1(gspca_dev); if (status < 0) return status; return 0; } static int sn9c2028_short_command(struct gspca_dev *gspca_dev, u8 *command) { int err_code; err_code = sn9c2028_command(gspca_dev, command); if (err_code < 0) return err_code; err_code = sn9c2028_read1(gspca_dev); if (err_code < 0) return err_code; return 0; } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam = &gspca_dev->cam; gspca_dbg(gspca_dev, D_PROBE, "SN9C2028 camera detected (vid/pid 0x%04X:0x%04X)\n", id->idVendor, id->idProduct); sd->model = id->idProduct; switch (sd->model) { case 0x7005: gspca_dbg(gspca_dev, D_PROBE, "Genius Smart 300 camera\n"); break; case 0x7003: gspca_dbg(gspca_dev, D_PROBE, "Genius Videocam Live v2\n"); break; case 0x8000: gspca_dbg(gspca_dev, D_PROBE, "DC31VC\n"); break; case 0x8001: gspca_dbg(gspca_dev, D_PROBE, "Spy camera\n"); break; case 0x8003: gspca_dbg(gspca_dev, D_PROBE, "CIF camera\n"); break; case 0x8008: gspca_dbg(gspca_dev, D_PROBE, "Mini-Shotz ms-350 camera\n"); break; case 0x800a: gspca_dbg(gspca_dev, D_PROBE, "Vivitar 3350b type camera\n"); cam->input_flags = V4L2_IN_ST_VFLIP | V4L2_IN_ST_HFLIP; break; } switch (sd->model) { case 0x8000: case 0x8001: case 0x8003: cam->cam_mode = cif_mode; cam->nmodes = ARRAY_SIZE(cif_mode); break; default: cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); } return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { int status; sn9c2028_read1(gspca_dev); sn9c2028_read1(gspca_dev); status = sn9c2028_read1(gspca_dev); return (status < 0) ? status : 0; } static int run_start_commands(struct gspca_dev *gspca_dev, struct init_command *cam_commands, int n) { int i, err_code = -1; for (i = 0; i < n; i++) { switch (cam_commands[i].to_read) { case 4: err_code = sn9c2028_long_command(gspca_dev, cam_commands[i].instruction); break; case 1: err_code = sn9c2028_short_command(gspca_dev, cam_commands[i].instruction); break; case 0: err_code = sn9c2028_command(gspca_dev, cam_commands[i].instruction); break; } if (err_code < 0) return err_code; } return 0; } static void set_gain(struct gspca_dev *gspca_dev, s32 g) { struct sd *sd = (struct sd *) gspca_dev; struct init_command genius_vcam_live_gain_cmds[] = { {{0x1d, 0x25, 0x10 /* This byte is gain */, 0x20, 0xab, 0x00}, 0}, }; if (!gspca_dev->streaming) return; switch (sd->model) { case 0x7003: genius_vcam_live_gain_cmds[0].instruction[2] = g; run_start_commands(gspca_dev, genius_vcam_live_gain_cmds, ARRAY_SIZE(genius_vcam_live_gain_cmds)); break; default: break; } } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { /* standalone gain control */ case V4L2_CID_GAIN: set_gain(gspca_dev, ctrl->val); break; /* autogain */ case V4L2_CID_AUTOGAIN: set_gain(gspca_dev, sd->gain->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; struct sd *sd = (struct sd *)gspca_dev; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 2); switch (sd->model) { case 0x7003: sd->gain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 0, 20, 1, 0); sd->autogain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); break; default: break; } return 0; } static int start_spy_cam(struct gspca_dev *gspca_dev) { struct init_command spy_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x04, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, /* width 352 */ {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, /* height 288 */ /* {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, */ {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, /* red gain ?*/ /* {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, */ {{0x13, 0x29, 0x01, 0x00, 0x00, 0x00}, 4}, /* {{0x13, 0x29, 0x01, 0x0c, 0x00, 0x00}, 4}, */ {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, /* {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, */ {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, /* {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, */ {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x02, 0x06, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x03, 0x13, 0x00, 0x00, 0x00}, 4}, /*don't mess with*/ /*{{0x11, 0x04, 0x06, 0x00, 0x00, 0x00}, 4}, observed */ {{0x11, 0x04, 0x00, 0x00, 0x00, 0x00}, 4}, /* brighter */ /*{{0x11, 0x05, 0x65, 0x00, 0x00, 0x00}, 4}, observed */ {{0x11, 0x05, 0x00, 0x00, 0x00, 0x00}, 4}, /* brighter */ {{0x11, 0x06, 0xb1, 0x00, 0x00, 0x00}, 4}, /* observed */ {{0x11, 0x07, 0x00, 0x00, 0x00, 0x00}, 4}, /*{{0x11, 0x08, 0x06, 0x00, 0x00, 0x00}, 4}, observed */ {{0x11, 0x08, 0x0b, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x09, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0c, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0d, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0e, 0x04, 0x00, 0x00, 0x00}, 4}, /* {{0x11, 0x0f, 0x00, 0x00, 0x00, 0x00}, 4}, */ /* brightness or gain. 0 is default. 4 is good * indoors at night with incandescent lighting */ {{0x11, 0x0f, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x06, 0x00, 0x00, 0x00}, 4}, /*hstart or hoffs*/ {{0x11, 0x11, 0x06, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x01, 0x00, 0x00, 0x00}, 4}, /* {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, observed */ {{0x1b, 0x02, 0x11, 0x00, 0x00, 0x00}, 1}, /* brighter */ /* {{0x1b, 0x13, 0x01, 0x00, 0x00, 0x00}, 1}, observed */ {{0x1b, 0x13, 0x11, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1}, /* compresses */ /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, spy_start_commands, ARRAY_SIZE(spy_start_commands)); } static int start_cif_cam(struct gspca_dev *gspca_dev) { struct init_command cif_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, /* The entire sequence below seems redundant */ /* {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x06, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, width? {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, height? {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, subsample? {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4},*/ {{0x1b, 0x21, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x17, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x19, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x03, 0x5a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x04, 0x27, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x05, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x12, 0x14, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x13, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x14, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x15, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x16, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x77, 0xa2, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x06, 0x0f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x07, 0x14, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x08, 0x0f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x09, 0x10, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x0e, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x0f, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x12, 0x07, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x10, 0x1f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 1}, /* width/8 */ {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 1}, /* height/8 */ /* {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, subsample? * {{0x13, 0x28, 0x01, 0x1e, 0x00, 0x00}, 4}, does nothing * {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, */ /* {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, * causes subsampling * but not a change in the resolution setting! */ {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x08, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x06, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x04, 0x6d, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x05, 0x03, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x36, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x0e, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x0f, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x36, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x10, 0x0f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1},/* use compression */ /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, cif_start_commands, ARRAY_SIZE(cif_start_commands)); } static int start_ms350_cam(struct gspca_dev *gspca_dev) { struct init_command ms350_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x04, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x00, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x01, 0x70, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x02, 0x05, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x03, 0x5d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x04, 0x07, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x05, 0x25, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x06, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x07, 0x09, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x08, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x09, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0a, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0c, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0d, 0x0c, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0e, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0f, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x63, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x70, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x18, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, /* width */ {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, /* height */ {{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, /* vstart? */ {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x40, 0x00, 0x00}, 4}, /* hstart? */ {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x1b, 0x02, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x18, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x0a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 0}, /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, ms350_start_commands, ARRAY_SIZE(ms350_start_commands)); } static int start_genius_cam(struct gspca_dev *gspca_dev) { struct init_command genius_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x10, 0x00, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, /* "preliminary" width and height settings */ {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x25, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x26, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x27, 0x88, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x30, 0x38, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x31, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x32, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x33, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x34, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x5b, 0x0a, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, /* real width */ {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, /* real height */ {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x62, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x28, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0xe0, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x80, 0x00, 0x00, 0x00}, 4}, {{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1}, {{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 0} /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, genius_start_commands, ARRAY_SIZE(genius_start_commands)); } static int start_genius_videocam_live(struct gspca_dev *gspca_dev) { int r; struct sd *sd = (struct sd *) gspca_dev; struct init_command genius_vcam_live_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 0}, {{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x10, 0x00, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4}, {{0x1c, 0x20, 0x00, 0x2d, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x01, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x02, 0x92, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x25, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x26, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x27, 0x88, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x30, 0x38, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x31, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x32, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x33, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x34, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x5b, 0x0a, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x62, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x28, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0xe0, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x80, 0x00, 0x00, 0x00}, 4}, {{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 0}, /* Camera should start to capture now. */ {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 0}, {{0x1b, 0x32, 0x26, 0x00, 0x00, 0x00}, 0}, {{0x1d, 0x25, 0x10, 0x20, 0xab, 0x00}, 0}, }; r = run_start_commands(gspca_dev, genius_vcam_live_start_commands, ARRAY_SIZE(genius_vcam_live_start_commands)); if (r < 0) return r; if (sd->gain) set_gain(gspca_dev, v4l2_ctrl_g_ctrl(sd->gain)); return r; } static int start_vivitar_cam(struct gspca_dev *gspca_dev) { struct init_command vivitar_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0a, 0x00, 0x00}, 4}, /* * Above is changed from OEM 0x0b. Fixes Bayer tiling. * Presumably gives a vertical shift of one row. */ {{0x13, 0x29, 0x01, 0x20, 0x00, 0x00}, 4}, /* Above seems to do horizontal shift. */ {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, /* Above three commands seem to relate to brightness. */ {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x12, 0x80, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x01, 0x77, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x3a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x12, 0x78, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x13, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x14, 0x80, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x15, 0x34, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x1b, 0x04, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x20, 0x44, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x23, 0xee, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x26, 0xa0, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x27, 0x9a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x28, 0xa0, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x29, 0x30, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x2a, 0x80, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x2b, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x2f, 0x3d, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x30, 0x24, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x32, 0x86, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x60, 0xa9, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x61, 0x42, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x65, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x69, 0x38, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x6f, 0x88, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x70, 0x0b, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x71, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x74, 0x21, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x75, 0x86, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x76, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x7d, 0xf3, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x17, 0x1c, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x18, 0xc0, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x19, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x1a, 0xf6, 0x00, 0x00, 0x00}, 1}, /* {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0b, 0x00, 0x00}, 4}, */ {{0x20, 0x36, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x10, 0x26, 0x00, 0x00, 0x00}, 1}, {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x76, 0x03, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x36, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x00, 0x3f, 0x00, 0x00, 0x00}, 1}, /* Above is brightness; OEM driver setting is 0x10 */ {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x20, 0x29, 0x30, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1} }; return run_start_commands(gspca_dev, vivitar_start_commands, ARRAY_SIZE(vivitar_start_commands)); } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int err_code; sd->sof_read = 0; switch (sd->model) { case 0x7005: err_code = start_genius_cam(gspca_dev); break; case 0x7003: err_code = start_genius_videocam_live(gspca_dev); break; case 0x8001: err_code = start_spy_cam(gspca_dev); break; case 0x8003: err_code = start_cif_cam(gspca_dev); break; case 0x8008: err_code = start_ms350_cam(gspca_dev); break; case 0x800a: err_code = start_vivitar_cam(gspca_dev); break; default: pr_err("Starting unknown camera, please report this\n"); return -ENXIO; } sd->avg_lum = -1; return err_code; } static void sd_stopN(struct gspca_dev *gspca_dev) { int result; __u8 data[6]; result = sn9c2028_read1(gspca_dev); if (result < 0) gspca_err(gspca_dev, "Camera Stop read failed\n"); memset(data, 0, 6); data[0] = 0x14; result = sn9c2028_command(gspca_dev, data); if (result < 0) gspca_err(gspca_dev, "Camera Stop command failed\n"); } static void do_autogain(struct gspca_dev *gspca_dev, int avg_lum) { struct sd *sd = (struct sd *) gspca_dev; s32 cur_gain = v4l2_ctrl_g_ctrl(sd->gain); if (avg_lum == -1) return; if (avg_lum < MIN_AVG_LUM) { if (cur_gain == sd->gain->maximum) return; cur_gain++; v4l2_ctrl_s_ctrl(sd->gain, cur_gain); } if (avg_lum > MAX_AVG_LUM) { if (cur_gain == sd->gain->minimum) return; cur_gain--; v4l2_ctrl_s_ctrl(sd->gain, cur_gain); } } static void sd_dqcallback(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->autogain == NULL || !v4l2_ctrl_g_ctrl(sd->autogain)) return; do_autogain(gspca_dev, sd->avg_lum); } /* Include sn9c2028 sof detection functions */ #include "sn9c2028.h" static void sd_pkt_scan(struct gspca_dev *gspca_dev, __u8 *data, /* isoc packet */ int len) /* iso packet length */ { unsigned char *sof; sof = sn9c2028_find_sof(gspca_dev, data, len); if (sof) { int n; /* finish decoding current frame */ n = sof - data; if (n > sizeof sn9c2028_sof_marker) n -= sizeof sn9c2028_sof_marker; else n = 0; gspca_frame_add(gspca_dev, LAST_PACKET, data, n); /* Start next frame. */ gspca_frame_add(gspca_dev, FIRST_PACKET, sn9c2028_sof_marker, sizeof sn9c2028_sof_marker); len -= sof - data; data = sof; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .dq_callback = sd_dqcallback, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0458, 0x7005)}, /* Genius Smart 300, version 2 */ {USB_DEVICE(0x0458, 0x7003)}, /* Genius Videocam Live v2 */ /* The Genius Smart is untested. I can't find an owner ! */ /* {USB_DEVICE(0x0c45, 0x8000)}, DC31VC, Don't know this camera */ {USB_DEVICE(0x0c45, 0x8001)}, /* Wild Planet digital spy cam */ {USB_DEVICE(0x0c45, 0x8003)}, /* Several small CIF cameras */ /* {USB_DEVICE(0x0c45, 0x8006)}, Unknown VGA camera */ {USB_DEVICE(0x0c45, 0x8008)}, /* Mini-Shotz ms-350 */ {USB_DEVICE(0x0c45, 0x800a)}, /* Vivicam 3350B */ {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
17 5 5 5 4 4 4 3 1 4 186 1 188 1 183 2 192 40 188 191 183 248 190 62 66 2 3 1 3 56 12 11 54 15 44 231 1 2 2 4 1 4 5 1 5 16 11 1 4 13 1 12 12 6 1 1 5 5 4 2 2 1 1 5 1 1 1 2 1 2 1 1 1 2 3 3 3 1 1 1 6 17 19 3 2 34 48 34 1 16 33 4 2 2 1 1 2 2 1 147 98 5 1 8 1 1 4 2 1 1 1 1 1 1 2 3 1 1 1 1 2 25 3 1 1 27 23 2 6 5 7 1 1 212 20 22 185 81 2 186 203 2 3 1 2 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/ptrace.c * * (C) Copyright 1999 Linus Torvalds * * Common interfaces for "ptrace()" which we do not want * to continually duplicate across every architecture. */ #include <linux/capability.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/ptrace.h> #include <linux/security.h> #include <linux/signal.h> #include <linux/uio.h> #include <linux/audit.h> #include <linux/pid_namespace.h> #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/regset.h> #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> #include <linux/compat.h> #include <linux/sched/signal.h> #include <linux/minmax.h> #include <linux/syscall_user_dispatch.h> #include <asm/syscall.h> /* for syscall_get_* */ /* * Access another process' address space via ptrace. * Source/target buffer must be kernel space, * Do not walk the page table directly, use get_user_pages */ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct mm_struct *mm; int ret; mm = get_task_mm(tsk); if (!mm) return 0; if (!tsk->ptrace || (current != tsk->parent) || ((get_dumpable(mm) != SUID_DUMP_USER) && !ptracer_capable(tsk, mm->user_ns))) { mmput(mm); return 0; } ret = access_remote_vm(mm, addr, buf, len, gup_flags); mmput(mm); return ret; } void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, const struct cred *ptracer_cred) { BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; child->ptracer_cred = get_cred(ptracer_cred); } /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { __ptrace_link(child, new_parent, current_cred()); } /** * __ptrace_unlink - unlink ptracee and restore its execution state * @child: ptracee to be unlinked * * Remove @child from the ptrace list, move it back to the original parent, * and restore the execution state so that it conforms to the group stop * state. * * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer * exiting. For PTRACE_DETACH, unless the ptracee has been killed between * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED. * If the ptracer is exiting, the ptracee can be in any state. * * After detach, the ptracee should be in a state which conforms to the * group stop. If the group is stopped or in the process of stopping, the * ptracee should be put into TASK_STOPPED; otherwise, it should be woken * up from TASK_TRACED. * * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED, * it goes through TRACED -> RUNNING -> STOPPED transition which is similar * to but in the opposite direction of what happens while attaching to a * stopped task. However, in this direction, the intermediate RUNNING * state is not hidden even from the current ptracer and if it immediately * re-attaches and performs a WNOHANG wait(2), it may fail. * * CONTEXT: * write_lock_irq(tasklist_lock) */ void __ptrace_unlink(struct task_struct *child) { const struct cred *old_cred; BUG_ON(!child->ptrace); clear_task_syscall_work(child, SYSCALL_TRACE); #if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU) clear_task_syscall_work(child, SYSCALL_EMU); #endif child->parent = child->real_parent; list_del_init(&child->ptrace_entry); old_cred = child->ptracer_cred; child->ptracer_cred = NULL; put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; /* * Clear all pending traps and TRAPPING. TRAPPING should be * cleared regardless of JOBCTL_STOP_PENDING. Do it explicitly. */ task_clear_jobctl_pending(child, JOBCTL_TRAP_MASK); task_clear_jobctl_trapping(child); /* * Reinstate JOBCTL_STOP_PENDING if group stop is in effect and * @child isn't dead. */ if (!(child->flags & PF_EXITING) && (child->signal->flags & SIGNAL_STOP_STOPPED || child->signal->group_stop_count)) child->jobctl |= JOBCTL_STOP_PENDING; /* * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick * @child in the butt. Note that @resume should be used iff @child * is in TASK_TRACED; otherwise, we might unduly disrupt * TASK_KILLABLE sleeps. */ if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } static bool looks_like_a_spurious_pid(struct task_struct *task) { if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP)) return false; if (task_pid_vnr(task) == task->ptrace_message) return false; /* * The tracee changed its pid but the PTRACE_EVENT_EXEC event * was not wait()'ed, most probably debugger targets the old * leader which was destroyed in de_thread(). */ return true; } /* * Ensure that nothing can wake it up, even SIGKILL * * A task is switched to this state while a ptrace operation is in progress; * such that the ptrace operation is uninterruptible. */ static bool ptrace_freeze_traced(struct task_struct *task) { bool ret = false; /* Lockless, nobody but us can set this flag */ if (task->jobctl & JOBCTL_LISTENING) return ret; spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { task->jobctl |= JOBCTL_PTRACE_FROZEN; ret = true; } spin_unlock_irq(&task->sighand->siglock); return ret; } static void ptrace_unfreeze_traced(struct task_struct *task) { unsigned long flags; /* * The child may be awake and may have cleared * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. */ if (lock_task_sighand(task, &flags)) { task->jobctl &= ~JOBCTL_PTRACE_FROZEN; if (__fatal_signal_pending(task)) { task->jobctl &= ~JOBCTL_TRACED; wake_up_state(task, __TASK_TRACED); } unlock_task_sighand(task, &flags); } } /** * ptrace_check_attach - check whether ptracee is ready for ptrace operation * @child: ptracee to check for * @ignore_state: don't check whether @child is currently %TASK_TRACED * * Check whether @child is being ptraced by %current and ready for further * ptrace operations. If @ignore_state is %false, @child also should be in * %TASK_TRACED state and on return the child is guaranteed to be traced * and not executing. If @ignore_state is %true, @child can be in any * state. * * CONTEXT: * Grabs and releases tasklist_lock and @child->sighand->siglock. * * RETURNS: * 0 on success, -ESRCH if %child is not ready. */ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) { int ret = -ESRCH; /* * We take the read lock around doing both checks to close a * possible race where someone else was tracing our child and * detached between these two checks. After this locked check, * we are sure that this is our traced child and that can only * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ if (ignore_state || ptrace_freeze_traced(child)) ret = 0; } read_unlock(&tasklist_lock); if (!ret && !ignore_state && WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED|TASK_FROZEN))) ret = -ESRCH; return ret; } static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { if (mode & PTRACE_MODE_NOAUDIT) return ns_capable_noaudit(ns, CAP_SYS_PTRACE); return ns_capable(ns, CAP_SYS_PTRACE); } /* Returns 0 on success, -errno on denial. */ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) { WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n"); return -EPERM; } /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. * * ptrace_attach denies several cases that /proc allows * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ /* Don't let security modules deny introspection */ if (same_thread_group(task, current)) return 0; rcu_read_lock(); if (mode & PTRACE_MODE_FSCREDS) { caller_uid = cred->fsuid; caller_gid = cred->fsgid; } else { /* * Using the euid would make more sense here, but something * in userland might rely on the old behavior, and this * shouldn't be a security problem since * PTRACE_MODE_REALCREDS implies that the caller explicitly * used a syscall that requests access to another process * (and not a filesystem syscall to procfs). */ caller_uid = cred->uid; caller_gid = cred->gid; } tcred = __task_cred(task); if (uid_eq(caller_uid, tcred->euid) && uid_eq(caller_uid, tcred->suid) && uid_eq(caller_uid, tcred->uid) && gid_eq(caller_gid, tcred->egid) && gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; ok: rcu_read_unlock(); /* * If a task drops privileges and becomes nondumpable (through a syscall * like setresuid()) while we are trying to access it, we must ensure * that the dumpability is read after the credentials; otherwise, * we may be able to attach to a task that we shouldn't be able to * attach to (as if the task had dropped privileges without becoming * nondumpable). * Pairs with a write barrier in commit_creds(). */ smp_rmb(); mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); } bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; task_lock(task); err = __ptrace_may_access(task, mode); task_unlock(task); return !err; } static int check_ptrace_options(unsigned long data) { if (data & ~(unsigned long)PTRACE_O_MASK) return -EINVAL; if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || !IS_ENABLED(CONFIG_SECCOMP)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED || current->ptrace & PT_SUSPEND_SECCOMP) return -EPERM; } return 0; } static inline void ptrace_set_stopped(struct task_struct *task, bool seize) { guard(spinlock)(&task->sighand->siglock); /* SEIZE doesn't trap tracee on attach */ if (!seize) send_signal_locked(SIGSTOP, SEND_SIG_PRIV, task, PIDTYPE_PID); /* * If the task is already STOPPED, set JOBCTL_TRAP_STOP and * TRAPPING, and kick it so that it transits to TRACED. TRAPPING * will be cleared if the child completes the transition or any * event which clears the group stop states happens. We'll wait * for the transition to complete before returning from this * function. * * This hides STOPPED -> RUNNING -> TRACED transition from the * attaching thread but a different thread in the same group can * still observe the transient RUNNING state. IOW, if another * thread's WNOHANG wait(2) on the stopped tracee races against * ATTACH, the wait(2) may fail due to the transient RUNNING. * * The following task_is_stopped() test is safe as both transitions * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task) && task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { task->jobctl &= ~JOBCTL_STOPPED; signal_wake_up_state(task, __TASK_STOPPED); } } static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) { bool seize = (request == PTRACE_SEIZE); int retval; if (seize) { if (addr != 0) return -EIO; /* * This duplicates the check in check_ptrace_options() because * ptrace_attach() and ptrace_setoptions() have historically * used different error codes for unknown ptrace options. */ if (flags & ~(unsigned long)PTRACE_O_MASK) return -EIO; retval = check_ptrace_options(flags); if (retval) return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; } audit_ptrace(task); if (unlikely(task->flags & PF_KTHREAD)) return -EPERM; if (same_thread_group(task, current)) return -EPERM; /* * Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently * under ptrace. */ scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, &task->signal->cred_guard_mutex) { scoped_guard (task_lock, task) { retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); if (retval) return retval; } scoped_guard (write_lock_irq, &tasklist_lock) { if (unlikely(task->exit_state)) return -EPERM; if (task->ptrace) return -EPERM; task->ptrace = flags; ptrace_link(task, current); ptrace_set_stopped(task, seize); } } /* * We do not bother to change retval or clear JOBCTL_TRAPPING * if wait_on_bit() was interrupted by SIGKILL. The tracer will * not return to user-mode, it will exit and clear this bit in * __ptrace_unlink() if it wasn't already cleared by the tracee; * and until then nobody can ptrace this task. */ wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); proc_ptrace_connector(task, PTRACE_ATTACH); return 0; } /** * ptrace_traceme -- helper for PTRACE_TRACEME * * Performs checks and sets PT_PTRACED. * Should be used by all ptrace implementations for PTRACE_TRACEME. */ static int ptrace_traceme(void) { int ret = -EPERM; write_lock_irq(&tasklist_lock); /* Are we already being traced? */ if (!current->ptrace) { ret = security_ptrace_traceme(current->parent); /* * Check PF_EXITING to ensure ->real_parent has not passed * exit_ptrace(). Otherwise we don't report the error but * pretend ->real_parent untraces us right after return. */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); return ret; } /* * Called with irqs disabled, returns true if childs should reap themselves. */ static int ignoring_children(struct sighand_struct *sigh) { int ret; spin_lock(&sigh->siglock); ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) || (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT); spin_unlock(&sigh->siglock); return ret; } /* * Called with tasklist_lock held for writing. * Unlink a traced task, and clean it up if it was a traced zombie. * Return true if it needs to be reaped with release_task(). * (We can't call release_task() here because we already hold tasklist_lock.) * * If it's a zombie, our attachedness prevented normal parent notification * or self-reaping. Do notification now if it would have happened earlier. * If it should reap itself, return true. * * If it's our own child, there is no notification to do. But if our normal * children self-reap, then this child was prevented by ptrace and we must * reap it now, in that case we must also wake up sub-threads sleeping in * do_wait(). */ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) { bool dead; __ptrace_unlink(p); if (p->exit_state != EXIT_ZOMBIE) return false; dead = !thread_group_leader(p); if (!dead && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, tracer)) dead = do_notify_parent(p, p->exit_signal); else if (ignoring_children(tracer->sighand)) { __wake_up_parent(p, tracer); dead = true; } } /* Mark it as in the process of being reaped. */ if (dead) p->exit_state = EXIT_DEAD; return dead; } static int ptrace_detach(struct task_struct *child, unsigned int data) { if (!valid_signal(data)) return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); write_lock_irq(&tasklist_lock); /* * We rely on ptrace_freeze_traced(). It can't be killed and * untraced by another thread, it can't be a zombie. */ WARN_ON(!child->ptrace || child->exit_state); /* * tasklist_lock avoids the race with wait_task_stopped(), see * the comment in ptrace_resume(). */ child->exit_code = data; __ptrace_detach(current, child); write_unlock_irq(&tasklist_lock); proc_ptrace_connector(child, PTRACE_DETACH); return 0; } /* * Detach all tasks we were using ptrace on. Called with tasklist held * for writing. */ void exit_ptrace(struct task_struct *tracer, struct list_head *dead) { struct task_struct *p, *n; list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { if (unlikely(p->ptrace & PT_EXITKILL)) send_sig_info(SIGKILL, SEND_SIG_PRIV, p); if (__ptrace_detach(tracer, p)) list_add(&p->ptrace_entry, dead); } } int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) { int copied = 0; while (len > 0) { char buf[128]; int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); if (!retval) { if (copied) break; return -EIO; } if (copy_to_user(dst, buf, retval)) return -EFAULT; copied += retval; src += retval; dst += retval; len -= retval; } return copied; } int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len) { int copied = 0; while (len > 0) { char buf[128]; int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) break; return -EIO; } copied += retval; src += retval; dst += retval; len -= retval; } return copied; } static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; int ret; ret = check_ptrace_options(data); if (ret) return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT); flags |= (data << PT_OPT_FLAG_SHIFT); child->ptrace = flags; return 0; } static int ptrace_getsiginfo(struct task_struct *child, kernel_siginfo_t *info) { unsigned long flags; int error = -ESRCH; if (lock_task_sighand(child, &flags)) { error = -EINVAL; if (likely(child->last_siginfo != NULL)) { copy_siginfo(info, child->last_siginfo); error = 0; } unlock_task_sighand(child, &flags); } return error; } static int ptrace_setsiginfo(struct task_struct *child, const kernel_siginfo_t *info) { unsigned long flags; int error = -ESRCH; if (lock_task_sighand(child, &flags)) { error = -EINVAL; if (likely(child->last_siginfo != NULL)) { copy_siginfo(child->last_siginfo, info); error = 0; } unlock_task_sighand(child, &flags); } return error; } static int ptrace_peek_siginfo(struct task_struct *child, unsigned long addr, unsigned long data) { struct ptrace_peeksiginfo_args arg; struct sigpending *pending; struct sigqueue *q; int ret, i; ret = copy_from_user(&arg, (void __user *) addr, sizeof(struct ptrace_peeksiginfo_args)); if (ret) return -EFAULT; if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED) return -EINVAL; /* unknown flags */ if (arg.nr < 0) return -EINVAL; /* Ensure arg.off fits in an unsigned long */ if (arg.off > ULONG_MAX) return 0; if (arg.flags & PTRACE_PEEKSIGINFO_SHARED) pending = &child->signal->shared_pending; else pending = &child->pending; for (i = 0; i < arg.nr; ) { kernel_siginfo_t info; unsigned long off = arg.off + i; bool found = false; spin_lock_irq(&child->sighand->siglock); list_for_each_entry(q, &pending->list, list) { if (!off--) { found = true; copy_siginfo(&info, &q->info); break; } } spin_unlock_irq(&child->sighand->siglock); if (!found) /* beyond the end of the list */ break; #ifdef CONFIG_COMPAT if (unlikely(in_compat_syscall())) { compat_siginfo_t __user *uinfo = compat_ptr(data); if (copy_siginfo_to_user32(uinfo, &info)) { ret = -EFAULT; break; } } else #endif { siginfo_t __user *uinfo = (siginfo_t __user *) data; if (copy_siginfo_to_user(uinfo, &info)) { ret = -EFAULT; break; } } data += sizeof(siginfo_t); i++; if (signal_pending(current)) break; cond_resched(); } if (i > 0) return i; return ret; } #ifdef CONFIG_RSEQ static long ptrace_get_rseq_configuration(struct task_struct *task, unsigned long size, void __user *data) { struct ptrace_rseq_configuration conf = { .rseq_abi_pointer = (u64)(uintptr_t)task->rseq, .rseq_abi_size = task->rseq_len, .signature = task->rseq_sig, .flags = 0, }; size = min_t(unsigned long, size, sizeof(conf)); if (copy_to_user(data, &conf, size)) return -EFAULT; return sizeof(conf); } #endif #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) #ifdef PTRACE_SINGLEBLOCK #define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) #else #define is_singleblock(request) 0 #endif #ifdef PTRACE_SYSEMU #define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP) #else #define is_sysemu_singlestep(request) 0 #endif static int ptrace_resume(struct task_struct *child, long request, unsigned long data) { if (!valid_signal(data)) return -EIO; if (request == PTRACE_SYSCALL) set_task_syscall_work(child, SYSCALL_TRACE); else clear_task_syscall_work(child, SYSCALL_TRACE); #if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU) if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP) set_task_syscall_work(child, SYSCALL_EMU); else clear_task_syscall_work(child, SYSCALL_EMU); #endif if (is_singleblock(request)) { if (unlikely(!arch_has_block_step())) return -EIO; user_enable_block_step(child); } else if (is_singlestep(request) || is_sysemu_singlestep(request)) { if (unlikely(!arch_has_single_step())) return -EIO; user_enable_single_step(child); } else { user_disable_single_step(child); } /* * Change ->exit_code and ->state under siglock to avoid the race * with wait_task_stopped() in between; a non-zero ->exit_code will * wrongly look like another report from tracee. * * Note that we need siglock even if ->exit_code == data and/or this * status was not reported yet, the new status must not be cleared by * wait_task_stopped() after resume. */ spin_lock_irq(&child->sighand->siglock); child->exit_code = data; child->jobctl &= ~JOBCTL_TRACED; wake_up_state(child, __TASK_TRACED); spin_unlock_irq(&child->sighand->siglock); return 0; } #ifdef CONFIG_HAVE_ARCH_TRACEHOOK static const struct user_regset * find_regset(const struct user_regset_view *view, unsigned int type) { const struct user_regset *regset; int n; for (n = 0; n < view->n; ++n) { regset = view->regsets + n; if (regset->core_note_type == type) return regset; } return NULL; } static int ptrace_regset(struct task_struct *task, int req, unsigned int type, struct iovec *kiov) { const struct user_regset_view *view = task_user_regset_view(task); const struct user_regset *regset = find_regset(view, type); int regset_no; if (!regset || (kiov->iov_len % regset->size) != 0) return -EINVAL; regset_no = regset - view->regsets; kiov->iov_len = min(kiov->iov_len, (__kernel_size_t) (regset->n * regset->size)); if (req == PTRACE_GETREGSET) return copy_regset_to_user(task, view, regset_no, 0, kiov->iov_len, kiov->iov_base); else return copy_regset_from_user(task, view, regset_no, 0, kiov->iov_len, kiov->iov_base); } /* * This is declared in linux/regset.h and defined in machine-dependent * code. We put the export here, near the primary machine-neutral use, * to ensure no machine forgets it. */ EXPORT_SYMBOL_GPL(task_user_regset_view); static unsigned long ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { unsigned long args[ARRAY_SIZE(info->entry.args)]; int i; info->entry.nr = syscall_get_nr(child, regs); syscall_get_arguments(child, regs, args); for (i = 0; i < ARRAY_SIZE(args); i++) info->entry.args[i] = args[i]; /* args is the last field in struct ptrace_syscall_info.entry */ return offsetofend(struct ptrace_syscall_info, entry.args); } static unsigned long ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { /* * As struct ptrace_syscall_info.entry is currently a subset * of struct ptrace_syscall_info.seccomp, it makes sense to * initialize that subset using ptrace_get_syscall_info_entry(). * This can be reconsidered in the future if these structures * diverge significantly enough. */ ptrace_get_syscall_info_entry(child, regs, info); info->seccomp.ret_data = child->ptrace_message; /* * ret_data is the last non-reserved field * in struct ptrace_syscall_info.seccomp */ return offsetofend(struct ptrace_syscall_info, seccomp.ret_data); } static unsigned long ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { info->exit.rval = syscall_get_error(child, regs); info->exit.is_error = !!info->exit.rval; if (!info->exit.is_error) info->exit.rval = syscall_get_return_value(child, regs); /* is_error is the last field in struct ptrace_syscall_info.exit */ return offsetofend(struct ptrace_syscall_info, exit.is_error); } static int ptrace_get_syscall_info_op(struct task_struct *child) { /* * This does not need lock_task_sighand() to access * child->last_siginfo because ptrace_freeze_traced() * called earlier by ptrace_check_attach() ensures that * the tracee cannot go away and clear its last_siginfo. */ switch (child->last_siginfo ? child->last_siginfo->si_code : 0) { case SIGTRAP | 0x80: switch (child->ptrace_message) { case PTRACE_EVENTMSG_SYSCALL_ENTRY: return PTRACE_SYSCALL_INFO_ENTRY; case PTRACE_EVENTMSG_SYSCALL_EXIT: return PTRACE_SYSCALL_INFO_EXIT; default: return PTRACE_SYSCALL_INFO_NONE; } case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8): return PTRACE_SYSCALL_INFO_SECCOMP; default: return PTRACE_SYSCALL_INFO_NONE; } } static int ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, void __user *datavp) { struct pt_regs *regs = task_pt_regs(child); struct ptrace_syscall_info info = { .op = ptrace_get_syscall_info_op(child), .arch = syscall_get_arch(child), .instruction_pointer = instruction_pointer(regs), .stack_pointer = user_stack_pointer(regs), }; unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); unsigned long write_size; switch (info.op) { case PTRACE_SYSCALL_INFO_ENTRY: actual_size = ptrace_get_syscall_info_entry(child, regs, &info); break; case PTRACE_SYSCALL_INFO_EXIT: actual_size = ptrace_get_syscall_info_exit(child, regs, &info); break; case PTRACE_SYSCALL_INFO_SECCOMP: actual_size = ptrace_get_syscall_info_seccomp(child, regs, &info); break; } write_size = min(actual_size, user_size); return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; } static int ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { unsigned long args[ARRAY_SIZE(info->entry.args)]; int nr = info->entry.nr; int i; /* * Check that the syscall number specified in info->entry.nr * is either a value of type "int" or a sign-extended value * of type "int". */ if (nr != info->entry.nr) return -ERANGE; for (i = 0; i < ARRAY_SIZE(args); i++) { args[i] = info->entry.args[i]; /* * Check that the syscall argument specified in * info->entry.args[i] is either a value of type * "unsigned long" or a sign-extended value of type "long". */ if (args[i] != info->entry.args[i]) return -ERANGE; } syscall_set_nr(child, regs, nr); /* * If the syscall number is set to -1, setting syscall arguments is not * just pointless, it would also clobber the syscall return value on * those architectures that share the same register both for the first * argument of syscall and its return value. */ if (nr != -1) syscall_set_arguments(child, regs, args); return 0; } static int ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { /* * info->entry is currently a subset of info->seccomp, * info->seccomp.ret_data is currently ignored. */ return ptrace_set_syscall_info_entry(child, regs, info); } static int ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, struct ptrace_syscall_info *info) { long rval = info->exit.rval; /* * Check that the return value specified in info->exit.rval * is either a value of type "long" or a sign-extended value * of type "long". */ if (rval != info->exit.rval) return -ERANGE; if (info->exit.is_error) syscall_set_return_value(child, regs, rval, 0); else syscall_set_return_value(child, regs, 0, rval); return 0; } static int ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size, const void __user *datavp) { struct pt_regs *regs = task_pt_regs(child); struct ptrace_syscall_info info; if (user_size < sizeof(info)) return -EINVAL; /* * The compatibility is tracked by info.op and info.flags: if user-space * does not instruct us to use unknown extra bits from future versions * of ptrace_syscall_info, we are not going to read them either. */ if (copy_from_user(&info, datavp, sizeof(info))) return -EFAULT; /* Reserved for future use. */ if (info.flags || info.reserved) return -EINVAL; /* Changing the type of the system call stop is not supported yet. */ if (ptrace_get_syscall_info_op(child) != info.op) return -EINVAL; switch (info.op) { case PTRACE_SYSCALL_INFO_ENTRY: return ptrace_set_syscall_info_entry(child, regs, &info); case PTRACE_SYSCALL_INFO_EXIT: return ptrace_set_syscall_info_exit(child, regs, &info); case PTRACE_SYSCALL_INFO_SECCOMP: return ptrace_set_syscall_info_seccomp(child, regs, &info); default: /* Other types of system call stops are not supported yet. */ return -EINVAL; } } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data) { bool seized = child->ptrace & PT_SEIZED; int ret = -EIO; kernel_siginfo_t siginfo, *si; void __user *datavp = (void __user *) data; unsigned long __user *datalp = datavp; unsigned long flags; switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: return generic_ptrace_peekdata(child, addr, data); case PTRACE_POKETEXT: case PTRACE_POKEDATA: return generic_ptrace_pokedata(child, addr, data); #ifdef PTRACE_OLDSETOPTIONS case PTRACE_OLDSETOPTIONS: #endif case PTRACE_SETOPTIONS: ret = ptrace_setoptions(child, data); break; case PTRACE_GETEVENTMSG: ret = put_user(child->ptrace_message, datalp); break; case PTRACE_PEEKSIGINFO: ret = ptrace_peek_siginfo(child, addr, data); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) ret = copy_siginfo_to_user(datavp, &siginfo); break; case PTRACE_SETSIGINFO: ret = copy_siginfo_from_user(&siginfo, datavp); if (!ret) ret = ptrace_setsiginfo(child, &siginfo); break; case PTRACE_GETSIGMASK: { sigset_t *mask; if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } if (test_tsk_restore_sigmask(child)) mask = &child->saved_sigmask; else mask = &child->blocked; if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; } case PTRACE_SETSIGMASK: { sigset_t new_set; if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } if (copy_from_user(&new_set, datavp, sizeof(sigset_t))) { ret = -EFAULT; break; } sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); /* * Every thread does recalc_sigpending() after resume, so * retarget_shared_pending() and recalc_sigpending() are not * called here. */ spin_lock_irq(&child->sighand->siglock); child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); clear_tsk_restore_sigmask(child); ret = 0; break; } case PTRACE_INTERRUPT: /* * Stop tracee without any side-effect on signal or job * control. At least one trap is guaranteed to happen * after this request. If @child is already trapped, the * current trap is not disturbed and another trap will * happen after the current trap is ended with PTRACE_CONT. * * The actual trap might not be PTRACE_EVENT_STOP trap but * the pending condition is cleared regardless. */ if (unlikely(!seized || !lock_task_sighand(child, &flags))) break; /* * INTERRUPT doesn't disturb existing trap sans one * exception. If ptracer issued LISTEN for the current * STOP, this INTERRUPT should clear LISTEN and re-trap * tracee into STOP. */ if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); unlock_task_sighand(child, &flags); ret = 0; break; case PTRACE_LISTEN: /* * Listen for events. Tracee must be in STOP. It's not * resumed per-se but is not considered to be in TRACED by * wait(2) or ptrace(2). If an async event (e.g. group * stop state change) happens, tracee will enter STOP trap * again. Alternatively, ptracer can issue INTERRUPT to * finish listening and re-trap tracee into STOP. */ if (unlikely(!seized || !lock_task_sighand(child, &flags))) break; si = child->last_siginfo; if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) { child->jobctl |= JOBCTL_LISTENING; /* * If NOTIFY is set, it means event happened between * start of this trap and now. Trigger re-trap. */ if (child->jobctl & JOBCTL_TRAP_NOTIFY) ptrace_signal_wake_up(child, true); ret = 0; } unlock_task_sighand(child, &flags); break; case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; #ifdef CONFIG_BINFMT_ELF_FDPIC case PTRACE_GETFDPIC: { struct mm_struct *mm = get_task_mm(child); unsigned long tmp = 0; ret = -ESRCH; if (!mm) break; switch (addr) { case PTRACE_GETFDPIC_EXEC: tmp = mm->context.exec_fdpic_loadmap; break; case PTRACE_GETFDPIC_INTERP: tmp = mm->context.interp_fdpic_loadmap; break; default: break; } mmput(mm); ret = put_user(tmp, datalp); break; } #endif case PTRACE_SINGLESTEP: #ifdef PTRACE_SINGLEBLOCK case PTRACE_SINGLEBLOCK: #endif #ifdef PTRACE_SYSEMU case PTRACE_SYSEMU: case PTRACE_SYSEMU_SINGLESTEP: #endif case PTRACE_SYSCALL: case PTRACE_CONT: return ptrace_resume(child, request, data); case PTRACE_KILL: send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: case PTRACE_SETREGSET: { struct iovec kiov; struct iovec __user *uiov = datavp; if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(kiov.iov_base, &uiov->iov_base) || __get_user(kiov.iov_len, &uiov->iov_len)) return -EFAULT; ret = ptrace_regset(child, request, addr, &kiov); if (!ret) ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } case PTRACE_GET_SYSCALL_INFO: ret = ptrace_get_syscall_info(child, addr, datavp); break; case PTRACE_SET_SYSCALL_INFO: ret = ptrace_set_syscall_info(child, addr, datavp); break; #endif case PTRACE_SECCOMP_GET_FILTER: ret = seccomp_get_filter(child, addr, datavp); break; case PTRACE_SECCOMP_GET_METADATA: ret = seccomp_get_metadata(child, addr, datavp); break; #ifdef CONFIG_RSEQ case PTRACE_GET_RSEQ_CONFIGURATION: ret = ptrace_get_rseq_configuration(child, addr, datavp); break; #endif case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG: ret = syscall_user_dispatch_set_config(child, addr, datavp); break; case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG: ret = syscall_user_dispatch_get_config(child, addr, datavp); break; default: break; } return ret; } SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, unsigned long, data) { struct task_struct *child; long ret; if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); goto out; } child = find_get_task_by_vpid(pid); if (!child) { ret = -ESRCH; goto out; } if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); goto out_put_task_struct; } ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); if (ret < 0) goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); if (ret || request != PTRACE_DETACH) ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); out: return ret; } int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long data) { unsigned long tmp; int copied; copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); } int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, unsigned long data) { int copied; copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } #if defined CONFIG_COMPAT int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data) { compat_ulong_t __user *datap = compat_ptr(data); compat_ulong_t word; kernel_siginfo_t siginfo; int ret; switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; else ret = put_user(word, datap); break; case PTRACE_POKETEXT: case PTRACE_POKEDATA: ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; case PTRACE_GETEVENTMSG: ret = put_user((compat_ulong_t) child->ptrace_message, datap); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) ret = copy_siginfo_to_user32( (struct compat_siginfo __user *) datap, &siginfo); break; case PTRACE_SETSIGINFO: ret = copy_siginfo_from_user32( &siginfo, (struct compat_siginfo __user *) datap); if (!ret) ret = ptrace_setsiginfo(child, &siginfo); break; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: case PTRACE_SETREGSET: { struct iovec kiov; struct compat_iovec __user *uiov = (struct compat_iovec __user *) datap; compat_uptr_t ptr; compat_size_t len; if (!access_ok(uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(ptr, &uiov->iov_base) || __get_user(len, &uiov->iov_len)) return -EFAULT; kiov.iov_base = compat_ptr(ptr); kiov.iov_len = len; ret = ptrace_regset(child, request, addr, &kiov); if (!ret) ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } #endif default: ret = ptrace_request(child, request, addr, data); } return ret; } COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, compat_long_t, addr, compat_long_t, data) { struct task_struct *child; long ret; if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); goto out; } child = find_get_task_by_vpid(pid); if (!child) { ret = -ESRCH; goto out; } if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { ret = ptrace_attach(child, request, addr, data); goto out_put_task_struct; } ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); if (ret || request != PTRACE_DETACH) ptrace_unfreeze_traced(child); } out_put_task_struct: put_task_struct(child); out: return ret; } #endif /* CONFIG_COMPAT */
23 23 14 14 14 14 13 13 7 12 20 23 23 23 3 19 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some microsoft "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #define MS_HIDINPUT BIT(0) #define MS_ERGONOMY BIT(1) #define MS_PRESENTER BIT(2) #define MS_RDESC BIT(3) #define MS_NOGET BIT(4) #define MS_DUPLICATE_USAGES BIT(5) #define MS_SURFACE_DIAL BIT(6) #define MS_QUIRK_FF BIT(7) struct ms_data { unsigned long quirks; struct hid_device *hdev; struct work_struct ff_worker; __u8 strong; __u8 weak; void *output_report_dmabuf; }; #define XB1S_FF_REPORT 3 #define ENABLE_WEAK BIT(0) #define ENABLE_STRONG BIT(1) enum { MAGNITUDE_STRONG = 2, MAGNITUDE_WEAK, MAGNITUDE_NUM }; struct xb1s_ff_report { __u8 report_id; __u8 enable; __u8 magnitude[MAGNITUDE_NUM]; __u8 duration_10ms; __u8 start_delay_10ms; __u8 loop_count; } __packed; static const __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; /* * Microsoft Wireless Desktop Receiver (Model 1028) has * 'Usage Min/Max' where it ought to have 'Physical Min/Max' */ if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 && rdesc[559] == 0x29) { hid_info(hdev, "fixing up Microsoft Wireless Receiver Model 1028 report descriptor\n"); rdesc[557] = 0x35; rdesc[559] = 0x45; } return rdesc; } #define ms_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ EV_KEY, (c)) static int ms_ergonomy_kb_quirk(struct hid_input *hi, struct hid_usage *usage, unsigned long **bit, int *max) { struct input_dev *input = hi->input; if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER) { switch (usage->hid & HID_USAGE) { /* * Microsoft uses these 2 reserved usage ids for 2 keys on * the MS office kb labelled "Office Home" and "Task Pane". */ case 0x29d: ms_map_key_clear(KEY_PROG1); return 1; case 0x29e: ms_map_key_clear(KEY_PROG2); return 1; } return 0; } if ((usage->hid & HID_USAGE_PAGE) != HID_UP_MSVENDOR) return 0; switch (usage->hid & HID_USAGE) { case 0xfd06: ms_map_key_clear(KEY_CHAT); break; case 0xfd07: ms_map_key_clear(KEY_PHONE); break; case 0xff00: /* Special keypad keys */ ms_map_key_clear(KEY_KPEQUAL); set_bit(KEY_KPLEFTPAREN, input->keybit); set_bit(KEY_KPRIGHTPAREN, input->keybit); break; case 0xff01: /* Scroll wheel */ hid_map_usage_clear(hi, usage, bit, max, EV_REL, REL_WHEEL); break; case 0xff02: /* * This byte contains a copy of the modifier keys byte of a * standard hid keyboard report, as send by interface 0 * (this usage is found on interface 1). * * This byte only gets send when another key in the same report * changes state, and as such is useless, ignore it. */ return -1; case 0xff05: set_bit(EV_REP, input->evbit); ms_map_key_clear(KEY_F13); set_bit(KEY_F14, input->keybit); set_bit(KEY_F15, input->keybit); set_bit(KEY_F16, input->keybit); set_bit(KEY_F17, input->keybit); set_bit(KEY_F18, input->keybit); break; default: return 0; } return 1; } static int ms_presenter_8k_quirk(struct hid_input *hi, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) != HID_UP_MSVENDOR) return 0; set_bit(EV_REP, hi->input->evbit); switch (usage->hid & HID_USAGE) { case 0xfd08: ms_map_key_clear(KEY_FORWARD); break; case 0xfd09: ms_map_key_clear(KEY_BACK); break; case 0xfd0b: ms_map_key_clear(KEY_PLAYPAUSE); break; case 0xfd0e: ms_map_key_clear(KEY_CLOSE); break; case 0xfd0f: ms_map_key_clear(KEY_PLAY); break; default: return 0; } return 1; } static int ms_surface_dial_quirk(struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { switch (usage->hid & HID_USAGE_PAGE) { case 0xff070000: case HID_UP_DIGITIZER: /* ignore those axis */ return -1; case HID_UP_GENDESK: switch (usage->hid) { case HID_GD_X: case HID_GD_Y: case HID_GD_RFKILL_BTN: /* ignore those axis */ return -1; } } return 0; } static int ms_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; if (quirks & MS_ERGONOMY) { int ret = ms_ergonomy_kb_quirk(hi, usage, bit, max); if (ret) return ret; } if ((quirks & MS_PRESENTER) && ms_presenter_8k_quirk(hi, usage, bit, max)) return 1; if (quirks & MS_SURFACE_DIAL) { int ret = ms_surface_dial_quirk(hi, field, usage, bit, max); if (ret) return ret; } return 0; } static int ms_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; if (quirks & MS_DUPLICATE_USAGES) clear_bit(usage->code, *bit); return 0; } static int ms_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; struct input_dev *input; if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput || !usage->type) return 0; input = field->hidinput->input; /* Handling MS keyboards special buttons */ if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff00)) { /* Special keypad keys */ input_report_key(input, KEY_KPEQUAL, value & 0x01); input_report_key(input, KEY_KPLEFTPAREN, value & 0x02); input_report_key(input, KEY_KPRIGHTPAREN, value & 0x04); return 1; } if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff01)) { /* Scroll wheel */ int step = ((value & 0x60) >> 5) + 1; switch (value & 0x1f) { case 0x01: input_report_rel(input, REL_WHEEL, step); break; case 0x1f: input_report_rel(input, REL_WHEEL, -step); break; } return 1; } if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff05)) { static unsigned int last_key = 0; unsigned int key = 0; switch (value) { case 0x01: key = KEY_F14; break; case 0x02: key = KEY_F15; break; case 0x04: key = KEY_F16; break; case 0x08: key = KEY_F17; break; case 0x10: key = KEY_F18; break; } if (key) { input_event(input, usage->type, key, 1); last_key = key; } else input_event(input, usage->type, last_key, 0); return 1; } return 0; } static void ms_ff_worker(struct work_struct *work) { struct ms_data *ms = container_of(work, struct ms_data, ff_worker); struct hid_device *hdev = ms->hdev; struct xb1s_ff_report *r = ms->output_report_dmabuf; int ret; memset(r, 0, sizeof(*r)); r->report_id = XB1S_FF_REPORT; r->enable = ENABLE_WEAK | ENABLE_STRONG; /* * Specifying maximum duration and maximum loop count should * cover maximum duration of a single effect, which is 65536 * ms */ r->duration_10ms = U8_MAX; r->loop_count = U8_MAX; r->magnitude[MAGNITUDE_STRONG] = ms->strong; /* left actuator */ r->magnitude[MAGNITUDE_WEAK] = ms->weak; /* right actuator */ ret = hid_hw_output_report(hdev, (__u8 *)r, sizeof(*r)); if (ret < 0) hid_warn(hdev, "failed to send FF report\n"); } static int ms_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct ms_data *ms = hid_get_drvdata(hid); if (effect->type != FF_RUMBLE) return 0; /* * Magnitude is 0..100 so scale the 16-bit input here */ ms->strong = ((u32) effect->u.rumble.strong_magnitude * 100) / U16_MAX; ms->weak = ((u32) effect->u.rumble.weak_magnitude * 100) / U16_MAX; schedule_work(&ms->ff_worker); return 0; } static int ms_init_ff(struct hid_device *hdev) { struct hid_input *hidinput; struct input_dev *input_dev; struct ms_data *ms = hid_get_drvdata(hdev); if (list_empty(&hdev->inputs)) { hid_err(hdev, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hdev->inputs.next, struct hid_input, list); input_dev = hidinput->input; if (!(ms->quirks & MS_QUIRK_FF)) return 0; ms->hdev = hdev; INIT_WORK(&ms->ff_worker, ms_ff_worker); ms->output_report_dmabuf = devm_kzalloc(&hdev->dev, sizeof(struct xb1s_ff_report), GFP_KERNEL); if (ms->output_report_dmabuf == NULL) return -ENOMEM; input_set_capability(input_dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(input_dev, NULL, ms_play_effect); } static void ms_remove_ff(struct hid_device *hdev) { struct ms_data *ms = hid_get_drvdata(hdev); if (!(ms->quirks & MS_QUIRK_FF)) return; cancel_work_sync(&ms->ff_worker); } static int ms_probe(struct hid_device *hdev, const struct hid_device_id *id) { unsigned long quirks = id->driver_data; struct ms_data *ms; int ret; ms = devm_kzalloc(&hdev->dev, sizeof(*ms), GFP_KERNEL); if (ms == NULL) return -ENOMEM; ms->quirks = quirks; hid_set_drvdata(hdev, ms); if (quirks & MS_NOGET) hdev->quirks |= HID_QUIRK_NOGET; if (quirks & MS_SURFACE_DIAL) hdev->quirks |= HID_QUIRK_INPUT_PER_APP; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err_free; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT | ((quirks & MS_HIDINPUT) ? HID_CONNECT_HIDINPUT_FORCE : 0)); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } ret = ms_init_ff(hdev); if (ret) hid_err(hdev, "could not initialize ff, continuing anyway"); return 0; err_free: return ret; } static void ms_remove(struct hid_device *hdev) { hid_hw_stop(hdev); ms_remove_ff(hdev); } static const struct hid_device_id ms_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV), .driver_data = MS_HIDINPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K_JP), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE7K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K), .driver_data = MS_ERGONOMY | MS_RDESC }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB), .driver_data = MS_PRESENTER }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0), .driver_data = MS_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500), .driver_data = MS_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), .driver_data = MS_HIDINPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD), .driver_data = MS_ERGONOMY}, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT), .driver_data = MS_PRESENTER }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, 0x091B), .driver_data = MS_SURFACE_DIAL }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1708), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1708_BLE), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1914), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1797), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1797_BLE), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS), .driver_data = MS_QUIRK_FF }, { } }; MODULE_DEVICE_TABLE(hid, ms_devices); static struct hid_driver ms_driver = { .name = "microsoft", .id_table = ms_devices, .report_fixup = ms_report_fixup, .input_mapping = ms_input_mapping, .input_mapped = ms_input_mapped, .event = ms_event, .probe = ms_probe, .remove = ms_remove, }; module_hid_driver(ms_driver); MODULE_DESCRIPTION("HID driver for some microsoft \"special\" devices"); MODULE_LICENSE("GPL");
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 // SPDX-License-Identifier: GPL-2.0 #include <linux/utsname.h> #include <net/cfg80211.h> #include "core.h" #include "rdev-ops.h" void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct device *pdev = wiphy_dev(wdev->wiphy); if (pdev->driver) strscpy(info->driver, pdev->driver->name, sizeof(info->driver)); else strscpy(info->driver, "N/A", sizeof(info->driver)); strscpy(info->version, init_utsname()->release, sizeof(info->version)); if (wdev->wiphy->fw_version[0]) strscpy(info->fw_version, wdev->wiphy->fw_version, sizeof(info->fw_version)); else strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); strscpy(info->bus_info, dev_name(pdev), sizeof(info->bus_info)); } EXPORT_SYMBOL(cfg80211_get_drvinfo);
3 3 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 // SPDX-License-Identifier: GPL-2.0 /* * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum * (SSE2 accelerated version) * * Copyright 2018 Google LLC */ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> #include <linux/sizes.h> #include <asm/simd.h> asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2); kernel_fpu_end(); src += n; srclen -= n; } while (srclen); return 0; } static int nhpoly1305_sse2_digest(struct shash_desc *desc, const u8 *src, unsigned int srclen, u8 *out) { return crypto_nhpoly1305_init(desc) ?: nhpoly1305_sse2_update(desc, src, srclen) ?: crypto_nhpoly1305_final(desc, out); } static struct shash_alg nhpoly1305_alg = { .base.cra_name = "nhpoly1305", .base.cra_driver_name = "nhpoly1305-sse2", .base.cra_priority = 200, .base.cra_ctxsize = sizeof(struct nhpoly1305_key), .base.cra_module = THIS_MODULE, .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_nhpoly1305_init, .update = nhpoly1305_sse2_update, .final = crypto_nhpoly1305_final, .digest = nhpoly1305_sse2_digest, .setkey = crypto_nhpoly1305_setkey, .descsize = sizeof(struct nhpoly1305_state), }; static int __init nhpoly1305_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2)) return -ENODEV; return crypto_register_shash(&nhpoly1305_alg); } static void __exit nhpoly1305_mod_exit(void) { crypto_unregister_shash(&nhpoly1305_alg); } module_init(nhpoly1305_mod_init); module_exit(nhpoly1305_mod_exit); MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (SSE2-accelerated)"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); MODULE_ALIAS_CRYPTO("nhpoly1305"); MODULE_ALIAS_CRYPTO("nhpoly1305-sse2");
32 422 429 425 39 20 426 335 1 339 425 421 344 74 7 417 5 423 422 419 421 423 420 42 3 4 426 49 31 390 421 430 40 40 40 40 44 43 6 4 34 32 37 1 36 6 41 391 391 396 394 392 242 240 241 243 62 173 10 2 8 180 241 241 242 240 242 241 2 188 31 31 31 31 31 35 30 5 19 19 18 19 19 17 18 13 12 13 14 14 6 13 14 14 14 30 31 31 11 21 20 19 19 1 18 17 18 2 17 18 1 1 11 14 4 128 7 115 6 176 173 175 173 2 73 35 6 90 18 7 5 8 4 6 4 150 5 168 1 2 171 166 21 158 20 21 161 161 19 145 60 120 161 160 1 163 55 4 162 141 22 121 61 27 3 2 1 1 1 4 19 19 19 19 19 18 4 117 70 1 10 10 9 1 159 159 1 120 40 41 150 7 1 95 5 21 77 92 166 2 163 163 21 6 130 158 24 127 38 1 60 102 161 129 38 161 164 163 139 9 29 9 91 160 158 20 70 1 73 19 13 13 86 3 1 6 82 3 5 5 5 5 5 4 2 3 3 3 2 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 // SPDX-License-Identifier: GPL-2.0-only /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The Internet Protocol (IP) output module. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <Alan.Cox@linux.org> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Hirokazu Takahashi, <taka@valinux.co.jp> * * See ip_input.c for original log * * Fixes: * Alan Cox : Missing nonblock feature in ip_build_xmit. * Mike Kilburn : htons() missing in ip_build_xmit. * Bradford Johnson: Fix faulty handling of some frames when * no route is found. * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit * (in case if packet not accepted by * output firewall rules) * Mike McLagan : Routing by source * Alexey Kuznetsov: use new route cache * Andi Kleen: Fix broken PMTU recovery and remove * some redundant tests. * Vitaly E. Lavrov : Transparent proxy revived after year coma. * Andi Kleen : Replace ip_reply with ip_send_reply. * Andi Kleen : Split fast and slow ip_build_xmit path * for decreased register pressure on x86 * and more readability. * Marc Boucher : When call_out_firewall returns FW_QUEUE, * silently drop skb instead of failing with -EPERM. * Detlev Wengorz : Copy protocol for fragments. * Hirokazu Takahashi: HW checksumming for outgoing UDP * datagrams. * Hirokazu Takahashi: sendfile() on UDP works now. */ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/init.h> #include <net/flow.h> #include <net/snmp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> #include <net/xfrm.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/arp.h> #include <net/icmp.h> #include <net/checksum.h> #include <net/gso.h> #include <net/inetpeer.h> #include <net/lwtunnel.h> #include <net/inet_dscp.h> #include <linux/bpf-cgroup.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_bridge.h> #include <linux/netlink.h> #include <linux/tcp.h> #include <net/psp.h> static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu, int (*output)(struct net *, struct sock *, struct sk_buff *)); /* Generate a checksum for an outgoing IP datagram. */ void ip_send_check(struct iphdr *iph) { iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } EXPORT_SYMBOL(ip_send_check); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS); iph_set_totlen(iph, skb->len); ip_send_check(iph); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip_out(sk, skb); if (unlikely(!skb)) return 0; skb->protocol = htons(ETH_P_IP); return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst_dev(skb), dst_output); } int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = __ip_local_out(net, sk, skb); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } EXPORT_SYMBOL_GPL(ip_local_out); static inline int ip_select_ttl(const struct inet_sock *inet, const struct dst_entry *dst) { int ttl = READ_ONCE(inet->uc_ttl); if (ttl < 0) ttl = ip4_dst_hoplimit(dst); return ttl; } /* * Add an ip header to a skbuff and send it out. * */ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt, u8 tos) { const struct inet_sock *inet = inet_sk(sk); struct rtable *rt = skb_rtable(skb); struct net *net = sock_net(sk); struct iphdr *iph; /* Build the IP header. */ skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; iph->tos = tos; iph->ttl = ip_select_ttl(inet, &rt->dst); iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; /* Do not bother generating IPID for small packets (eg SYNACK) */ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; /* TCP packets here are SYNACK with fat IPv4/TCP options. * Avoid using the hashed IP ident generator. */ if (sk->sk_protocol == IPPROTO_TCP) iph->id = (__force __be16)get_random_u16(); else __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; ip_options_build(skb, &opt->opt, daddr, rt); } skb->priority = READ_ONCE(sk->sk_priority); if (!skb->mark) skb->mark = READ_ONCE(sk->sk_mark); /* Send it out. */ return ip_local_out(net, skb->sk, skb); } EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst_dev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); /* OUTOCTETS should be counted after fragment */ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); if (!skb) return -ENOMEM; } if (lwtunnel_xmit_redirect(dst->lwtstate)) { int res = lwtunnel_xmit(skb); if (res != LWTUNNEL_XMIT_CONTINUE) return res; } rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res; sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ res = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock(); return res; } rcu_read_unlock(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return PTR_ERR(neigh); } static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { struct sk_buff *segs, *nskb; netdev_features_t features; int ret = 0; /* common case: seglen is <= mtu */ if (skb_gso_validate_network_len(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length exceeds the egress MTU. * * This can happen in several cases: * - Forwarding of a TCP GRO skb, when DF flag is not set. * - Forwarding of an skb that arrived on a virtualization interface * (virtio-net/vhost/tap) with TSO/GSO size set by other network * stack. * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an * interface with a smaller MTU. * - Arriving GRO skb (or GSO skb in a virtualized environment) that is * bridged to a NETIF_F_TSO tunnel stacked over an interface with an * insufficient MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } consume_skb(skb); skb_list_walk_safe(segs, segs, nskb) { int err; skb_mark_not_on_list(segs); err = ip_fragment(net, sk, segs, mtu, ip_finish_output2); if (err && ret == 0) ret = err; } return ret; } static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(net, sk, skb); } #endif mtu = ip_skb_dst_mtu(sk, skb); if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); if (skb->len > mtu || IPCB(skb)->frag_max_size) return ip_fragment(net, sk, skb, mtu, ip_finish_output2); return ip_finish_output2(net, sk, skb); } static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { int ret; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: return __ip_finish_output(net, sk, skb); case NET_XMIT_CN: return __ip_finish_output(net, sk, skb) ? : ret; default: kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } static int ip_mc_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *new_rt; bool do_cn = false; int ret, err; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_CN: do_cn = true; fallthrough; case NET_XMIT_SUCCESS: break; default: kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten, * see ipv4_pktinfo_prepare(). */ new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb)); if (new_rt) { new_rt->rt_iif = 0; skb_dst_drop(skb); skb_dst_set(skb, &new_rt->dst); } err = dev_loopback_xmit(net, sk, skb); return (do_cn && err) ? ret : err; } int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; /* * If the indicated interface is up and running, send the packet. */ skb->dev = dev; skb->protocol = htons(ETH_P_IP); /* * Multicasts are looped back for other local users */ if (rt->rt_flags&RTCF_MULTICAST) { if (sk_mc_loop(sk) #ifdef CONFIG_IP_MROUTE /* Small optimization: do not loopback not local frames, which returned after forwarding; they will be dropped by ip_mr_input in any case. Note, that local frames are looped back to be delivered to local recipients. This check is duplicated in ip_mr_input at the moment. */ && ((rt->rt_flags & RTCF_LOCAL) || !(IPCB(skb)->flags & IPSKB_FORWARDED)) #endif ) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, newskb, NULL, newskb->dev, ip_mc_finish_output); } /* Multicasts with ttl 0 must not go beyond the host */ if (ip_hdr(skb)->ttl == 0) { kfree_skb(skb); return 0; } } if (rt->rt_flags&RTCF_BROADCAST) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, newskb, NULL, newskb->dev, ip_mc_finish_output); } return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev, *indev = skb->dev; int ret_val; rcu_read_lock(); dev = skb_dst_dev_rcu(skb); skb->dev = dev; skb->protocol = htons(ETH_P_IP); ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, indev, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); rcu_read_unlock(); return ret_val; } EXPORT_SYMBOL(ip_output); /* * copy saddr and daddr, possibly using 64bit load/stores * Equivalent to : * iph->saddr = fl4->saddr; * iph->daddr = fl4->daddr; */ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) { BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); iph->saddr = fl4->saddr; iph->daddr = fl4->daddr; } /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options_rcu *inet_opt; struct flowi4 *fl4; struct rtable *rt; struct iphdr *iph; int res; /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. */ rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); fl4 = &fl->u.ip4; rt = skb_rtable(skb); if (rt) goto packet_routed; /* Make sure we can route this packet. */ rt = dst_rtable(__sk_dst_check(sk, 0)); if (!rt) { inet_sk_init_flowi4(inet, fl4); /* sctp_v4_xmit() uses its own DSCP value */ fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set_noref(skb, &rt->dst); packet_routed: if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff)); if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; iph->ttl = ip_select_ttl(inet, &rt->dst); iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); /* Transport layer set skb->h.foo itself. */ if (inet_opt && inet_opt->opt.optlen) { iph->ihl += inet_opt->opt.optlen >> 2; ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt); } ip_select_ident_segs(net, skb, sk, skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); res = ip_local_out(net, sk, skb); rcu_read_unlock(); return res; no_route: rcu_read_unlock(); IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES); return -EHOSTUNREACH; } EXPORT_SYMBOL(__ip_queue_xmit); int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos)); } EXPORT_SYMBOL(ip_queue_xmit); static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; to->mark = from->mark; skb_copy_hash(to, from); #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif nf_copy(to, from); skb_ext_copy(to, from); #if IS_ENABLED(CONFIG_IP_VS) to->ipvs_property = from->ipvs_property; #endif skb_copy_secmark(to, from); } static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct iphdr *iph = ip_hdr(skb); if ((iph->frag_off & htons(IP_DF)) == 0) return ip_do_fragment(net, sk, skb, output); if (unlikely(!skb->ignore_df || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); kfree_skb(skb); return -EMSGSIZE; } return ip_do_fragment(net, sk, skb, output); } void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, unsigned int hlen, struct ip_fraglist_iter *iter) { unsigned int first_len = skb_pagelen(skb); iter->frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); iter->offset = 0; iter->iph = iph; iter->hlen = hlen; skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); ip_send_check(iph); } EXPORT_SYMBOL(ip_fraglist_init); void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) { unsigned int hlen = iter->hlen; struct iphdr *iph = iter->iph; struct sk_buff *frag; frag = iter->frag; frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), iph, hlen); iter->iph = ip_hdr(frag); iph = iter->iph; iph->tot_len = htons(frag->len); ip_copy_metadata(frag, skb); iter->offset += skb->len - hlen; iph->frag_off = htons(iter->offset >> 3); if (frag->next) iph->frag_off |= htons(IP_MF); /* Ready, complete checksum */ ip_send_check(iph); } EXPORT_SYMBOL(ip_fraglist_prepare); void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, unsigned int mtu, bool DF, struct ip_frag_state *state) { struct iphdr *iph = ip_hdr(skb); state->DF = DF; state->hlen = hlen; state->ll_rs = ll_rs; state->mtu = mtu; state->left = skb->len - hlen; /* Space per frame */ state->ptr = hlen; /* Where to start from */ state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; state->not_last_frag = iph->frag_off & htons(IP_MF); } EXPORT_SYMBOL(ip_frag_init); static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, bool first_frag) { /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; /* ANK: dirty, but effective trick. Upgrade options only if * the segment to be fragmented was THE FIRST (otherwise, * options are already fixed) and make it ONCE * on the initial skb, so that all the following fragments * will inherit fixed options. */ if (first_frag) ip_options_fragment(from); } struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state) { unsigned int len = state->left; struct sk_buff *skb2; struct iphdr *iph; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > state->mtu) len = state->mtu; /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < state->left) { len &= ~7; } /* Allocate buffer */ skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC); if (!skb2) return ERR_PTR(-ENOMEM); /* * Set up data on packet */ ip_copy_metadata(skb2, skb); skb_reserve(skb2, state->ll_rs); skb_put(skb2, len + state->hlen); skb_reset_network_header(skb2); skb2->transport_header = skb2->network_header + state->hlen; /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(skb2, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen); /* * Copy a block of the IP datagram. */ if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len)) BUG(); state->left -= len; /* * Fill in the new header fields. */ iph = ip_hdr(skb2); iph->frag_off = htons((state->offset >> 3)); if (state->DF) iph->frag_off |= htons(IP_DF); /* * Added AC : If we are fragmenting a fragment that's not the * last fragment then keep MF on each bit */ if (state->left > 0 || state->not_last_frag) iph->frag_off |= htons(IP_MF); state->ptr += len; state->offset += len; iph->tot_len = htons(len + state->hlen); ip_send_check(iph); return skb2; } EXPORT_SYMBOL(ip_frag_next); /* * This IP datagram is too large to be sent in one piece. Break it up into * smaller pieces (each of size equal to IP header plus * a block of the data of the original IP data part) that will yet fit in a * single device frame, and queue such a frame for sending. */ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct iphdr *iph; struct sk_buff *skb2; u8 tstamp_type = skb->tstamp_type; struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; ktime_t tstamp = skb->tstamp; struct ip_frag_state state; int err = 0; /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto fail; /* * Point into the IP datagram header. */ iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(sk, skb); if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) mtu = IPCB(skb)->frag_max_size; /* * Setup starting values. */ hlen = iph->ihl * 4; mtu = mtu - hlen; /* Size of data space */ IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; ll_rs = LL_RESERVED_SPACE(rt->dst.dev); /* When frag_list is given, use it. First, check its validity: * some transformers could create wrong frag_list or break existing * one, it is not prohibited. In this case fall back to copying. * * LATER: this step can be merged to real generation of fragments, * we can switch to copy when see the first bad fragment. */ if (skb_has_frag_list(skb)) { struct sk_buff *frag, *frag2; unsigned int first_len = skb_pagelen(skb); if (first_len - hlen > mtu || ((first_len - hlen) & 7) || ip_is_fragment(iph) || skb_cloned(skb) || skb_headroom(skb) < ll_rs) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen + ll_rs) goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } skb->truesize -= frag->truesize; } /* Everything is OK. Generate! */ ip_fraglist_init(skb, iph, hlen, &iter); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { bool first_frag = (iter.offset == 0); IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); if (first_frag && IPCB(skb)->opt.optlen) { /* ipcb->opt is not populated for frags * coming from __ip_make_skb(), * ip_options_fragment() needs optlen */ IPCB(iter.frag)->opt.optlen = IPCB(skb)->opt.optlen; ip_options_fragment(iter.frag); ip_send_check(iter.iph); } } skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); if (err || !iter.frag) break; skb = ip_fraglist_next(&iter); } if (err == 0) { IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); return 0; } kfree_skb_list(iter.frag); IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); return err; slow_path_clean: skb_walk_frags(skb, frag2) { if (frag2 == frag) break; frag2->sk = NULL; frag2->destructor = NULL; skb->truesize += frag2->truesize; } } slow_path: /* * Fragment the datagram. */ ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU, &state); /* * Keep copying data until we run out. */ while (state.left > 0) { bool first_frag = (state.offset == 0); skb2 = ip_frag_next(skb, &state); if (IS_ERR(skb2)) { err = PTR_ERR(skb2); goto fail; } ip_frag_ipcb(skb, skb2, first_frag); /* * Put this fragment into the sending queue. */ skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, skb2); if (err) goto fail; IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); } consume_skb(skb); IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); return err; fail: kfree_skb(skb); IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); return err; } EXPORT_SYMBOL(ip_do_fragment); int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct msghdr *msg = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { if (!copy_from_iter_full(to, len, &msg->msg_iter)) return -EFAULT; } else { __wsum csum = 0; if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter)) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } return 0; } EXPORT_SYMBOL(ip_generic_getfrag); static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ubuf_info *uarg = NULL; struct sk_buff *skb; struct ip_options *opt = cork->opt; int hh_len; int exthdrlen; int mtu; int copy; int err; int offset = 0; bool zc = false; unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = dst_rtable(cork->dst); bool paged, hold_tskey = false, extra_uref = false; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; paged = !!cork->gso_size; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; maxnonfragsize = ip_sk_ignore_df(sk) ? IP_MAX_MTU : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } /* * transhdrlen > 0 means that this is the first fragment and we wish * it won't be fragmented in the future. */ if (transhdrlen && length + fragheaderlen <= mtu && rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && (!(flags & MSG_MORE) || cork->gso_size) && (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) csummode = CHECKSUM_PARTIAL; if ((flags & MSG_ZEROCOPY) && length) { struct msghdr *msg = from; if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) return -EINVAL; /* Leave uarg NULL if can't zerocopy, callers should * be able to handle it. */ if ((rt->dst.dev->features & NETIF_F_SG) && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; uarg = msg->msg_ubuf; } } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb), false); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; } else { uarg_to_msgzc(uarg)->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); } } } else if ((flags & MSG_SPLICE_PAGES) && length) { if (inet_test_bit(HDRINCL, sk)) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && getfrag == ip_generic_getfrag) /* We need an empty buffer to attach stuff to */ paged = true; else flags &= ~MSG_SPLICE_PAGES; } cork->length += length; if (cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { if (cork->flags & IPCORK_TS_OPT_ID) { tskey = cork->ts_opt_id; } else { tskey = atomic_inc_return(&sk->sk_tskey) - 1; hold_tskey = true; } } /* So, what's going on in the loop below? * * We use calculated fragment length to generate chained skb, * each of segments is IP fragment ready for sending to network after * adding appropriate IP header. */ if (!skb) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = mtu - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen, alloc_extra; unsigned int pagedlen; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; if (skb_prev) fraggap = skb_prev->len - maxfraglen; else fraggap = 0; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > mtu - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; pagedlen = 0; alloc_extra = hh_len + 15; alloc_extra += exthdrlen; /* The last fragment gets additional space at tail. * Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be * the last. */ if (datalen == length + fraggap) alloc_extra += rt->dst.trailer_len; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else if (!paged && (fraglen + alloc_extra < SKB_MAX_ALLOC || !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = fragheaderlen + transhdrlen; pagedlen = datalen - transhdrlen; } alloclen += alloc_extra; if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; } if (!skb) goto error; /* * Fill in the control structures */ skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); /* * Find where to start putting bytes. */ data = skb_put(skb, fraglen + exthdrlen - pagedlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); data += fragheaderlen + exthdrlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap - pagedlen; /* [!] NOTE: copy will be negative if pagedlen>0 * because then the equation reduces to -fraggap. */ if (copy > 0 && INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } else if (flags & MSG_SPLICE_PAGES) { copy = 0; } offset += copy; length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; /* only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; cork->tx_flags = 0; skb_shinfo(skb)->tskey = tskey; tskey = 0; skb_zcopy_set(skb, uarg, &extra_uref); if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); /* * Put the packet on the pending queue. */ if (!skb->destructor) { skb->destructor = sock_wfree; skb->sk = sk; wmem_alloc_delta += skb->truesize; } __skb_queue_tail(queue, skb); continue; } if (copy > length) copy = length; if (!(rt->dst.dev->features&NETIF_F_SG) && skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else if (flags & MSG_SPLICE_PAGES) { struct msghdr *msg = from; err = -EIO; if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) goto error; skb_zcopy_downgrade_managed(skb); if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { err = -EMSGSIZE; if (i == MAX_SKB_FRAGS) goto error; __skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, 0); skb_shinfo(skb)->nr_frags = ++i; get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb_len_add(skb, copy); wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); if (err < 0) goto error; } offset += copy; length -= copy; } if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); return 0; error_efault: err = -EFAULT; error: net_zcopy_put_abort(uarg, extra_uref); cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); if (hold_tskey) atomic_dec(&sk->sk_tskey); return err; } static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, struct ipcm_cookie *ipc, struct rtable **rtp) { struct ip_options_rcu *opt; struct rtable *rt; rt = *rtp; if (unlikely(!rt)) return -EFAULT; cork->fragsize = ip_sk_use_pmtu(sk) ? dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); if (!inetdev_valid_mtu(cork->fragsize)) return -ENETUNREACH; /* * setup for corking. */ opt = ipc->opt; if (opt) { if (!cork->opt) { cork->opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); if (unlikely(!cork->opt)) return -ENOBUFS; } memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); cork->flags |= IPCORK_OPT; cork->addr = ipc->addr; } cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; /* We stole this route, caller should not release it. */ *rtp = NULL; cork->length = 0; cork->ttl = ipc->ttl; cork->tos = ipc->tos; cork->mark = ipc->sockc.mark; cork->priority = ipc->sockc.priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->flags |= IPCORK_TS_OPT_ID; cork->ts_opt_id = ipc->sockc.ts_opt_id; } return 0; } /* * ip_append_data() can make one large IP datagram from many pieces of * data. Each piece will be held on the socket until * ip_push_pending_frames() is called. Each piece can be a page or * non-page data. * * Not only UDP, other transport protocols - e.g. raw sockets - can use * this interface potentially. * * LATER: length must be adjusted by pad at tail, when it is required. */ int ip_append_data(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); int err; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp); if (err) return err; } else { transhdrlen = 0; } return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags); } static void ip_cork_release(struct inet_cork *cork) { cork->flags &= ~IPCORK_OPT; kfree(cork->opt); cork->opt = NULL; dst_release(cork->dst); cork->dst = NULL; } /* * Combined all pending IP fragments on the socket as one IP datagram * and push them out. */ struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options *opt = NULL; struct rtable *rt = dst_rtable(cork->dst); struct iphdr *iph; u8 pmtudisc, ttl; __be16 df = 0; skb = __skb_dequeue(queue); if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ pmtudisc = READ_ONCE(inet->pmtudisc); if (pmtudisc == IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); if (cork->flags & IPCORK_OPT) opt = cork->opt; if (cork->ttl != 0) ttl = cork->ttl; else if (rt->rt_type == RTN_MULTICAST) ttl = READ_ONCE(inet->mc_ttl); else ttl = ip_select_ttl(inet, &rt->dst); iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos); iph->frag_off = df; iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); ip_select_ident(net, skb, sk); if (opt) { iph->ihl += opt->optlen >> 2; ip_options_build(skb, opt, cork->addr, rt); } skb->priority = cork->priority; skb->mark = cork->mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); else skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid); /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount */ cork->dst = NULL; skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) { u8 icmp_type; /* For such sockets, transhdrlen is zero when do ip_append_data(), * so icmphdr does not in skb linear region and can not get icmp_type * by icmp_hdr(skb)->type. */ if (sk->sk_type == SOCK_RAW && !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH)) icmp_type = fl4->fl4_icmp_type; else icmp_type = icmp_hdr(skb)->type; icmp_out_count(net, icmp_type); } ip_cork_release(cork); out: return skb; } int ip_send_skb(struct net *net, struct sk_buff *skb) { int err; err = ip_local_out(net, skb->sk, skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); } return err; } int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) { struct sk_buff *skb; skb = ip_finish_skb(sk, fl4); if (!skb) return 0; /* Netfilter gets whole the not fragmented skb. */ return ip_send_skb(sock_net(sk), skb); } /* * Throw away all pending data on the socket. */ static void __ip_flush_pending_frames(struct sock *sk, struct sk_buff_head *queue, struct inet_cork *cork) { struct sk_buff *skb; while ((skb = __skb_dequeue_tail(queue)) != NULL) kfree_skb(skb); ip_cork_release(cork); } void ip_flush_pending_frames(struct sock *sk) { __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); } struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, struct inet_cork *cork, unsigned int flags) { struct sk_buff_head queue; int err; if (flags & MSG_PROBE) return NULL; __skb_queue_head_init(&queue); cork->flags = 0; cork->addr = 0; cork->opt = NULL; err = ip_setup_cork(sk, cork, ipc, rtp); if (err) return ERR_PTR(err); err = __ip_append_data(sk, fl4, &queue, cork, &current->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { __ip_flush_pending_frames(sk, &queue, cork); return ERR_PTR(err); } return __ip_make_skb(sk, fl4, &queue, cork); } /* * Fetch data from kernel space and fill in checksum if needed. */ static int ip_reply_glue_bits(void *dptr, char *to, int offset, int len, int odd, struct sk_buff *skb) { __wsum csum; csum = csum_partial_copy_nocheck(dptr+offset, to, len); skb->csum = csum_block_add(skb->csum, csum, odd); return 0; } /* * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. */ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len, u64 transmit_time, u32 txhash) { struct ip_options_data replyopts; struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); struct net *net = sock_net(sk); struct sk_buff *nskb; int err; int oif; if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt)) return; ipcm_init(&ipc); ipc.addr = daddr; ipc.sockc.transmit_time = transmit_time; if (replyopts.opt.opt.optlen) { ipc.opt = &replyopts.opt; if (replyopts.opt.opt.srr) daddr = replyopts.opt.opt.faddr; } oif = arg->bound_dev_if; if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) oif = skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark, arg->tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest, arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) return; inet_sk(sk)->tos = arg->tos; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); ipc.sockc.mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); if (unlikely(err)) { ip_flush_pending_frames(sk); goto out; } nskb = skb_peek(&sk->sk_write_queue); if (nskb) { if (arg->csumoffset >= 0) *((__sum16 *)skb_transport_header(nskb) + arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; if (orig_sk) { skb_set_owner_edemux(nskb, (struct sock *)orig_sk); psp_reply_set_decrypted(nskb); } if (transmit_time) nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); ip_push_pending_frames(sk, &fl4); } out: ip_rt_put(rt); } void __init ip_init(void) { ip_rt_init(); inet_initpeers(); #if defined(CONFIG_IP_MULTICAST) igmp_mc_init(); #endif }
1 5 5 1 4 1 1 2 2 2 1 1 1 1 1 2 2 2 2 2 13 2 7 6 3 4 3 2 1 1 1 3 1 5 3 1 1 3 1 5 1 6 4 1 2 2 1 3 3 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 3 1 1 2 1 1 1 2 4 3 1 2 1 2 3 1 1 1 2 11 5 6 11 2 1 1 1 2 6 1 1 3 1 2 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 25 1 4 1 4 6 3 2 3 1 1 8 1 1 1 5 1 2 1 4 4 4 1 3 1 1 2 3 3 3 1 1 2 3 2 2 1 1 3 3 2 8 1 5 3 8 3 1 1 1 3 3 2 2 3 1 1 1 5 1 1 1 1 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 // SPDX-License-Identifier: GPL-2.0 /* * cfg80211 - wext compat code * * This is temporary code until all wireless functionality is migrated * into cfg80211, when that happens all the exports here go away and * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2019-2023 Intel Corporation */ #include <linux/export.h> #include <linux/wireless.h> #include <linux/nl80211.h> #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/slab.h> #include <net/iw_handler.h> #include <net/cfg80211.h> #include <net/cfg80211-wext.h> #include "wext-compat.h" #include "core.h" #include "rdev-ops.h" int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { strcpy(wrqu->name, "IEEE 802.11"); return 0; } int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { __u32 *mode = &wrqu->mode; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; rdev = wiphy_to_rdev(wdev->wiphy); switch (*mode) { case IW_MODE_INFRA: type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_MONITOR: type = NL80211_IFTYPE_MONITOR; break; default: return -EINVAL; } if (type == wdev->iftype) return 0; memset(&vifparams, 0, sizeof(vifparams)); guard(wiphy)(wdev->wiphy); return cfg80211_change_iface(rdev, dev, type, &vifparams); } int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { __u32 *mode = &wrqu->mode; struct wireless_dev *wdev = dev->ieee80211_ptr; if (!wdev) return -EOPNOTSUPP; switch (wdev->iftype) { case NL80211_IFTYPE_AP: *mode = IW_MODE_MASTER; break; case NL80211_IFTYPE_STATION: *mode = IW_MODE_INFRA; break; case NL80211_IFTYPE_ADHOC: *mode = IW_MODE_ADHOC; break; case NL80211_IFTYPE_MONITOR: *mode = IW_MODE_MONITOR; break; case NL80211_IFTYPE_WDS: *mode = IW_MODE_REPEAT; break; case NL80211_IFTYPE_AP_VLAN: *mode = IW_MODE_SECOND; /* FIXME */ break; default: *mode = IW_MODE_AUTO; break; } return 0; } int cfg80211_wext_giwrange(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_range *range = (struct iw_range *) extra; enum nl80211_band band; int i, c = 0; if (!wdev) return -EOPNOTSUPP; data->length = sizeof(struct iw_range); memset(range, 0, sizeof(struct iw_range)); range->we_version_compiled = WIRELESS_EXT; range->we_version_source = 21; range->retry_capa = IW_RETRY_LIMIT; range->retry_flags = IW_RETRY_LIMIT; range->min_retry = 0; range->max_retry = 255; range->min_rts = 0; range->max_rts = 2347; range->min_frag = 256; range->max_frag = 2346; range->max_encoding_tokens = 4; range->max_qual.updated = IW_QUAL_NOISE_INVALID; switch (wdev->wiphy->signal_type) { case CFG80211_SIGNAL_TYPE_NONE: break; case CFG80211_SIGNAL_TYPE_MBM: range->max_qual.level = (u8)-110; range->max_qual.qual = 70; range->avg_qual.qual = 35; range->max_qual.updated |= IW_QUAL_DBM; range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; break; case CFG80211_SIGNAL_TYPE_UNSPEC: range->max_qual.level = 100; range->max_qual.qual = 100; range->avg_qual.qual = 50; range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; break; } range->avg_qual.level = range->max_qual.level / 2; range->avg_qual.noise = range->max_qual.noise / 2; range->avg_qual.updated = range->max_qual.updated; for (i = 0; i < wdev->wiphy->n_cipher_suites; i++) { switch (wdev->wiphy->cipher_suites[i]) { case WLAN_CIPHER_SUITE_TKIP: range->enc_capa |= (IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_WPA); break; case WLAN_CIPHER_SUITE_CCMP: range->enc_capa |= (IW_ENC_CAPA_CIPHER_CCMP | IW_ENC_CAPA_WPA2); break; case WLAN_CIPHER_SUITE_WEP40: range->encoding_size[range->num_encoding_sizes++] = WLAN_KEY_LEN_WEP40; break; case WLAN_CIPHER_SUITE_WEP104: range->encoding_size[range->num_encoding_sizes++] = WLAN_KEY_LEN_WEP104; break; } } for (band = 0; band < NUM_NL80211_BANDS; band ++) { struct ieee80211_supported_band *sband; sband = wdev->wiphy->bands[band]; if (!sband) continue; for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) { struct ieee80211_channel *chan = &sband->channels[i]; if (!(chan->flags & IEEE80211_CHAN_DISABLED)) { range->freq[c].i = ieee80211_frequency_to_channel( chan->center_freq); range->freq[c].m = chan->center_freq; range->freq[c].e = 6; c++; } } } range->num_channels = c; range->num_frequency = c; IW_EVENT_CAPA_SET_KERNEL(range->event_capa); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); if (wdev->wiphy->max_scan_ssids > 0) range->scan_capa |= IW_SCAN_CAPA_ESSID; return 0; } /** * cfg80211_wext_freq - get wext frequency for non-"auto" * @freq: the wext freq encoding * * Returns: a frequency, or a negative error code, or 0 for auto. */ int cfg80211_wext_freq(struct iw_freq *freq) { /* * Parse frequency - return 0 for auto and * -EINVAL for impossible things. */ if (freq->e == 0) { enum nl80211_band band = NL80211_BAND_2GHZ; if (freq->m < 0) return 0; if (freq->m > 14) band = NL80211_BAND_5GHZ; return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div <= 0) return -EINVAL; return freq->m / div; } } int cfg80211_wext_siwrts(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rts = &wrqu->rts; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 orts = wdev->wiphy->rts_threshold; int err; guard(wiphy)(&rdev->wiphy); if (rts->disabled || !rts->fixed) wdev->wiphy->rts_threshold = (u32) -1; else if (rts->value < 0) return -EINVAL; else wdev->wiphy->rts_threshold = rts->value; err = rdev_set_wiphy_params(rdev, -1, WIPHY_PARAM_RTS_THRESHOLD); if (err) wdev->wiphy->rts_threshold = orts; return err; } int cfg80211_wext_giwrts(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rts = &wrqu->rts; struct wireless_dev *wdev = dev->ieee80211_ptr; rts->value = wdev->wiphy->rts_threshold; rts->disabled = rts->value == (u32) -1; rts->fixed = 1; return 0; } int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *frag = &wrqu->frag; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 ofrag = wdev->wiphy->frag_threshold; int err; guard(wiphy)(&rdev->wiphy); if (frag->disabled || !frag->fixed) { wdev->wiphy->frag_threshold = (u32) -1; } else if (frag->value < 256) { return -EINVAL; } else { /* Fragment length must be even, so strip LSB. */ wdev->wiphy->frag_threshold = frag->value & ~0x1; } err = rdev_set_wiphy_params(rdev, -1, WIPHY_PARAM_FRAG_THRESHOLD); if (err) wdev->wiphy->frag_threshold = ofrag; return err; } int cfg80211_wext_giwfrag(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *frag = &wrqu->frag; struct wireless_dev *wdev = dev->ieee80211_ptr; frag->value = wdev->wiphy->frag_threshold; frag->disabled = frag->value == (u32) -1; frag->fixed = 1; return 0; } static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *retry = &wrqu->retry; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 changed = 0; u8 olong = wdev->wiphy->retry_long; u8 oshort = wdev->wiphy->retry_short; int err; if (retry->disabled || retry->value < 1 || retry->value > 255 || (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; guard(wiphy)(&rdev->wiphy); if (retry->flags & IW_RETRY_LONG) { wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; } else if (retry->flags & IW_RETRY_SHORT) { wdev->wiphy->retry_short = retry->value; changed |= WIPHY_PARAM_RETRY_SHORT; } else { wdev->wiphy->retry_short = retry->value; wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; changed |= WIPHY_PARAM_RETRY_SHORT; } err = rdev_set_wiphy_params(rdev, -1, changed); if (err) { wdev->wiphy->retry_short = oshort; wdev->wiphy->retry_long = olong; } return err; } int cfg80211_wext_giwretry(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *retry = &wrqu->retry; struct wireless_dev *wdev = dev->ieee80211_ptr; retry->disabled = 0; if (retry->flags == 0 || (retry->flags & IW_RETRY_SHORT)) { /* * First return short value, iwconfig will ask long value * later if needed */ retry->flags |= IW_RETRY_LIMIT | IW_RETRY_SHORT; retry->value = wdev->wiphy->retry_short; if (wdev->wiphy->retry_long == wdev->wiphy->retry_short) retry->flags |= IW_RETRY_LONG; return 0; } if (retry->flags & IW_RETRY_LONG) { retry->flags = IW_RETRY_LIMIT | IW_RETRY_LONG; retry->value = wdev->wiphy->retry_long; } return 0; } static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, struct net_device *dev, bool pairwise, const u8 *addr, bool remove, bool tx_key, int idx, struct key_params *params) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i; bool rejoin = false; if (wdev->valid_links) return -EINVAL; if (pairwise && !addr) return -EINVAL; /* * In many cases we won't actually need this, but it's better * to do it first in case the allocation fails. Don't use wext. */ if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; for (i = 0; i < 4; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } if (wdev->iftype != NL80211_IFTYPE_ADHOC && wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { if (!wdev->connected) return -ENOLINK; if (!rdev->ops->set_default_mgmt_key) return -EOPNOTSUPP; if (idx < 4 || idx > 5) return -EINVAL; } else if (idx < 0 || idx > 3) return -EINVAL; if (remove) { err = 0; if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) { /* * If removing the current TX key, we will need to * join a new IBSS without the privacy bit clear. */ if (idx == wdev->wext.default_key && wdev->iftype == NL80211_IFTYPE_ADHOC) { cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } if (!pairwise && addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; else err = rdev_del_key(rdev, dev, -1, idx, pairwise, addr); } wdev->wext.connect.privacy = false; /* * Applications using wireless extensions expect to be * able to delete keys that don't exist, so allow that. */ if (err == -ENOENT) err = 0; if (!err) { if (!addr && idx < 4) { memset(wdev->wext.keys->data[idx], 0, sizeof(wdev->wext.keys->data[idx])); wdev->wext.keys->params[idx].key_len = 0; wdev->wext.keys->params[idx].cipher = 0; } if (idx == wdev->wext.default_key) wdev->wext.default_key = -1; else if (idx == wdev->wext.default_mgmt_key) wdev->wext.default_mgmt_key = -1; } if (!err && rejoin) err = cfg80211_ibss_wext_join(rdev, wdev); return err; } if (addr) tx_key = false; if (cfg80211_validate_key_settings(rdev, params, idx, pairwise, addr)) return -EINVAL; err = 0; if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) err = rdev_add_key(rdev, dev, -1, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; if (err) return err; /* * We only need to store WEP keys, since they're the only keys that * can be set before a connection is established and persist after * disconnecting. */ if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104)) { wdev->wext.keys->params[idx] = *params; memcpy(wdev->wext.keys->data[idx], params->key, params->key_len); wdev->wext.keys->params[idx].key = wdev->wext.keys->data[idx]; } if ((params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104) && (tx_key || (!addr && wdev->wext.default_key == -1))) { if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) { /* * If we are getting a new TX key from not having * had one before we need to join a new IBSS with * the privacy bit set. */ if (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->wext.default_key == -1) { cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } err = rdev_set_default_key(rdev, dev, -1, idx, true, true); } if (!err) { wdev->wext.default_key = idx; if (rejoin) err = cfg80211_ibss_wext_join(rdev, wdev); } return err; } if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) err = rdev_set_default_mgmt_key(rdev, dev, -1, idx); if (!err) wdev->wext.default_mgmt_key = idx; return err; } return 0; } static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *keybuf) { struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct key_params params; bool remove = false; int idx; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* no use -- only MFP (set_default_mgmt_key) is optional */ if (!rdev->ops->del_key || !rdev->ops->add_key || !rdev->ops->set_default_key) return -EOPNOTSUPP; guard(wiphy)(&rdev->wiphy); if (wdev->valid_links) return -EOPNOTSUPP; idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; if (idx < 0) idx = 0; } else if (idx < 1 || idx > 4) { return -EINVAL; } else { idx--; } if (erq->flags & IW_ENCODE_DISABLED) remove = true; else if (erq->length == 0) { /* No key data - just set the default TX key index */ int err = 0; if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) err = rdev_set_default_key(rdev, dev, -1, idx, true, true); if (!err) wdev->wext.default_key = idx; return err; } memset(&params, 0, sizeof(params)); params.key = keybuf; params.key_len = erq->length; if (erq->length == 5) params.cipher = WLAN_CIPHER_SUITE_WEP40; else if (erq->length == 13) params.cipher = WLAN_CIPHER_SUITE_WEP104; else if (!remove) return -EINVAL; return cfg80211_set_encryption(rdev, dev, false, NULL, remove, wdev->wext.default_key == -1, idx, &params); } static int cfg80211_wext_siwencodeext(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; const u8 *addr; int idx; bool remove = false; struct key_params params; u32 cipher; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* no use -- only MFP (set_default_mgmt_key) is optional */ if (!rdev->ops->del_key || !rdev->ops->add_key || !rdev->ops->set_default_key) return -EOPNOTSUPP; if (wdev->valid_links) return -EOPNOTSUPP; switch (ext->alg) { case IW_ENCODE_ALG_NONE: remove = true; cipher = 0; break; case IW_ENCODE_ALG_WEP: if (ext->key_len == 5) cipher = WLAN_CIPHER_SUITE_WEP40; else if (ext->key_len == 13) cipher = WLAN_CIPHER_SUITE_WEP104; else return -EINVAL; break; case IW_ENCODE_ALG_TKIP: cipher = WLAN_CIPHER_SUITE_TKIP; break; case IW_ENCODE_ALG_CCMP: cipher = WLAN_CIPHER_SUITE_CCMP; break; case IW_ENCODE_ALG_AES_CMAC: cipher = WLAN_CIPHER_SUITE_AES_CMAC; break; default: return -EOPNOTSUPP; } if (erq->flags & IW_ENCODE_DISABLED) remove = true; idx = erq->flags & IW_ENCODE_INDEX; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { if (idx < 4 || idx > 5) { idx = wdev->wext.default_mgmt_key; if (idx < 0) return -EINVAL; } else idx--; } else { if (idx < 1 || idx > 4) { idx = wdev->wext.default_key; if (idx < 0) return -EINVAL; } else idx--; } addr = ext->addr.sa_data; if (is_broadcast_ether_addr(addr)) addr = NULL; memset(&params, 0, sizeof(params)); params.key = ext->key; params.key_len = ext->key_len; params.cipher = cipher; if (ext->ext_flags & IW_ENCODE_EXT_RX_SEQ_VALID) { params.seq = ext->rx_seq; params.seq_len = 6; } guard(wiphy)(wdev->wiphy); return cfg80211_set_encryption(rdev, dev, !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), addr, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, idx, &params); } static int cfg80211_wext_giwencode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *keybuf) { struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; int idx; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; if (idx < 0) idx = 0; } else if (idx < 1 || idx > 4) return -EINVAL; else idx--; erq->flags = idx + 1; if (!wdev->wext.keys || !wdev->wext.keys->params[idx].cipher) { erq->flags |= IW_ENCODE_DISABLED; erq->length = 0; return 0; } erq->length = min_t(size_t, erq->length, wdev->wext.keys->params[idx].key_len); memcpy(keybuf, wdev->wext.keys->params[idx].key, erq->length); erq->flags |= IW_ENCODE_ENABLED; return 0; } static int cfg80211_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_freq *wextfreq = &wrqu->freq; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; int freq; guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) return -EINVAL; chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; return cfg80211_set_monitor_channel(rdev, dev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) return -EINVAL; chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: return -EOPNOTSUPP; } } static int cfg80211_wext_giwfreq(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_freq *freq = &wrqu->freq; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = {}; int ret; guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: if (!rdev->ops->get_channel) return -EINVAL; ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret) return ret; freq->m = chandef.chan->center_freq; freq->e = 6; return ret; default: return -EINVAL; } } static int cfg80211_wext_siwtxpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; if (data->txpower.flags & IW_TXPOW_RANGE) return -EINVAL; if (!rdev->ops->set_tx_power) return -EOPNOTSUPP; /* only change when not disabling */ if (!data->txpower.disabled) { rfkill_set_sw_state(rdev->wiphy.rfkill, false); if (data->txpower.fixed) { /* * wext doesn't support negative values, see * below where it's for automatic */ if (data->txpower.value < 0) return -EINVAL; dbm = data->txpower.value; type = NL80211_TX_POWER_FIXED; /* TODO: do regulatory check! */ } else { /* * Automatic power level setting, max being the value * passed in from userland. */ if (data->txpower.value < 0) { type = NL80211_TX_POWER_AUTOMATIC; } else { dbm = data->txpower.value; type = NL80211_TX_POWER_LIMITED; } } } else { if (rfkill_set_sw_state(rdev->wiphy.rfkill, true)) schedule_work(&rdev->rfkill_block); return 0; } guard(wiphy)(&rdev->wiphy); return rdev_set_tx_power(rdev, wdev, -1, type, DBM_TO_MBM(dbm)); } static int cfg80211_wext_giwtxpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err, val; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; if (data->txpower.flags & IW_TXPOW_RANGE) return -EINVAL; if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; scoped_guard(wiphy, &rdev->wiphy) { err = rdev_get_tx_power(rdev, wdev, -1, 0, &val); } if (err) return err; /* well... oh well */ data->txpower.fixed = 1; data->txpower.disabled = rfkill_blocked(rdev->wiphy.rfkill); data->txpower.value = val; data->txpower.flags = IW_TXPOW_DBM; return 0; } static int cfg80211_set_auth_alg(struct wireless_dev *wdev, s32 auth_alg) { int nr_alg = 0; if (!auth_alg) return -EINVAL; if (auth_alg & ~(IW_AUTH_ALG_OPEN_SYSTEM | IW_AUTH_ALG_SHARED_KEY | IW_AUTH_ALG_LEAP)) return -EINVAL; if (auth_alg & IW_AUTH_ALG_OPEN_SYSTEM) { nr_alg++; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM; } if (auth_alg & IW_AUTH_ALG_SHARED_KEY) { nr_alg++; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_SHARED_KEY; } if (auth_alg & IW_AUTH_ALG_LEAP) { nr_alg++; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_NETWORK_EAP; } if (nr_alg > 1) wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; return 0; } static int cfg80211_set_wpa_version(struct wireless_dev *wdev, u32 wpa_versions) { if (wpa_versions & ~(IW_AUTH_WPA_VERSION_WPA | IW_AUTH_WPA_VERSION_WPA2| IW_AUTH_WPA_VERSION_DISABLED)) return -EINVAL; if ((wpa_versions & IW_AUTH_WPA_VERSION_DISABLED) && (wpa_versions & (IW_AUTH_WPA_VERSION_WPA| IW_AUTH_WPA_VERSION_WPA2))) return -EINVAL; if (wpa_versions & IW_AUTH_WPA_VERSION_DISABLED) wdev->wext.connect.crypto.wpa_versions &= ~(NL80211_WPA_VERSION_1|NL80211_WPA_VERSION_2); if (wpa_versions & IW_AUTH_WPA_VERSION_WPA) wdev->wext.connect.crypto.wpa_versions |= NL80211_WPA_VERSION_1; if (wpa_versions & IW_AUTH_WPA_VERSION_WPA2) wdev->wext.connect.crypto.wpa_versions |= NL80211_WPA_VERSION_2; return 0; } static int cfg80211_set_cipher_group(struct wireless_dev *wdev, u32 cipher) { if (cipher & IW_AUTH_CIPHER_WEP40) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_WEP40; else if (cipher & IW_AUTH_CIPHER_WEP104) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_WEP104; else if (cipher & IW_AUTH_CIPHER_TKIP) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_TKIP; else if (cipher & IW_AUTH_CIPHER_CCMP) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_CCMP; else if (cipher & IW_AUTH_CIPHER_AES_CMAC) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_AES_CMAC; else if (cipher & IW_AUTH_CIPHER_NONE) wdev->wext.connect.crypto.cipher_group = 0; else return -EINVAL; return 0; } static int cfg80211_set_cipher_pairwise(struct wireless_dev *wdev, u32 cipher) { int nr_ciphers = 0; u32 *ciphers_pairwise = wdev->wext.connect.crypto.ciphers_pairwise; if (cipher & IW_AUTH_CIPHER_WEP40) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_WEP40; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_WEP104) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_WEP104; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_TKIP) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_TKIP; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_CCMP) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_CCMP; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_AES_CMAC) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_AES_CMAC; nr_ciphers++; } BUILD_BUG_ON(NL80211_MAX_NR_CIPHER_SUITES < 5); wdev->wext.connect.crypto.n_ciphers_pairwise = nr_ciphers; return 0; } static int cfg80211_set_key_mgt(struct wireless_dev *wdev, u32 key_mgt) { int nr_akm_suites = 0; if (key_mgt & ~(IW_AUTH_KEY_MGMT_802_1X | IW_AUTH_KEY_MGMT_PSK)) return -EINVAL; if (key_mgt & IW_AUTH_KEY_MGMT_802_1X) { wdev->wext.connect.crypto.akm_suites[nr_akm_suites] = WLAN_AKM_SUITE_8021X; nr_akm_suites++; } if (key_mgt & IW_AUTH_KEY_MGMT_PSK) { wdev->wext.connect.crypto.akm_suites[nr_akm_suites] = WLAN_AKM_SUITE_PSK; nr_akm_suites++; } wdev->wext.connect.crypto.n_akm_suites = nr_akm_suites; return 0; } static int cfg80211_wext_siwauth(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *data = &wrqu->param; struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_PRIVACY_INVOKED: wdev->wext.connect.privacy = data->value; return 0; case IW_AUTH_WPA_VERSION: return cfg80211_set_wpa_version(wdev, data->value); case IW_AUTH_CIPHER_GROUP: return cfg80211_set_cipher_group(wdev, data->value); case IW_AUTH_KEY_MGMT: return cfg80211_set_key_mgt(wdev, data->value); case IW_AUTH_CIPHER_PAIRWISE: return cfg80211_set_cipher_pairwise(wdev, data->value); case IW_AUTH_80211_AUTH_ALG: return cfg80211_set_auth_alg(wdev, data->value); case IW_AUTH_WPA_ENABLED: case IW_AUTH_RX_UNENCRYPTED_EAPOL: case IW_AUTH_DROP_UNENCRYPTED: case IW_AUTH_MFP: return 0; default: return -EOPNOTSUPP; } } static int cfg80211_wext_giwauth(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { /* XXX: what do we need? */ return -EOPNOTSUPP; } static int cfg80211_wext_siwpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *wrq = &wrqu->power; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); bool ps; int timeout = wdev->ps_timeout; int err; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; if (!rdev->ops->set_power_mgmt) return -EOPNOTSUPP; if (wrq->disabled) { ps = false; } else { switch (wrq->flags & IW_POWER_MODE) { case IW_POWER_ON: /* If not specified */ case IW_POWER_MODE: /* If set all mask */ case IW_POWER_ALL_R: /* If explicitly state all */ ps = true; break; default: /* Otherwise we ignore */ return -EINVAL; } if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT)) return -EINVAL; if (wrq->flags & IW_POWER_TIMEOUT) timeout = wrq->value / 1000; } guard(wiphy)(&rdev->wiphy); err = rdev_set_power_mgmt(rdev, dev, ps, timeout); if (err) return err; wdev->ps = ps; wdev->ps_timeout = timeout; return 0; } static int cfg80211_wext_giwpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *wrq = &wrqu->power; struct wireless_dev *wdev = dev->ieee80211_ptr; wrq->disabled = !wdev->ps; return 0; } static int cfg80211_wext_siwrate(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rate = &wrqu->bitrate; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; bool match = false; int band, ridx; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; memset(&mask, 0, sizeof(mask)); fixed = 0; maxrate = (u32)-1; if (rate->value < 0) { /* nothing */ } else if (rate->fixed) { fixed = rate->value / 100000; } else { maxrate = rate->value / 100000; } for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wdev->wiphy->bands[band]; if (sband == NULL) continue; for (ridx = 0; ridx < sband->n_bitrates; ridx++) { struct ieee80211_rate *srate = &sband->bitrates[ridx]; if (fixed == srate->bitrate) { mask.control[band].legacy = 1 << ridx; match = true; break; } if (srate->bitrate <= maxrate) { mask.control[band].legacy |= 1 << ridx; match = true; } } } if (!match) return -EINVAL; guard(wiphy)(&rdev->wiphy); if (dev->ieee80211_ptr->valid_links) return -EOPNOTSUPP; return rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); } static int cfg80211_wext_giwrate(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rate = &wrqu->bitrate; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct station_info sinfo = {}; u8 addr[ETH_ALEN]; int err; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!rdev->ops->get_station) return -EOPNOTSUPP; err = 0; if (!wdev->valid_links && wdev->links[0].client.current_bss) memcpy(addr, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else err = -EOPNOTSUPP; if (err) return err; scoped_guard(wiphy, &rdev->wiphy) { err = rdev_get_station(rdev, dev, addr, &sinfo); } if (err) return err; if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { err = -EOPNOTSUPP; goto free; } rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); free: cfg80211_sinfo_release_content(&sinfo); return err; } /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; static struct station_info sinfo = {}; u8 bssid[ETH_ALEN]; int ret; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) return NULL; if (!rdev->ops->get_station) return NULL; /* Grab BSSID of current BSS, if any */ wiphy_lock(&rdev->wiphy); if (wdev->valid_links || !wdev->links[0].client.current_bss) { wiphy_unlock(&rdev->wiphy); return NULL; } memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); memset(&sinfo, 0, sizeof(sinfo)); ret = rdev_get_station(rdev, dev, bssid, &sinfo); wiphy_unlock(&rdev->wiphy); if (ret) return NULL; memset(&wstats, 0, sizeof(wstats)); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { int sig = sinfo.signal; wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.updated |= IW_QUAL_DBM; wstats.qual.level = sig; if (sig < -110) sig = -110; else if (sig > -40) sig = -40; wstats.qual.qual = sig + 110; break; } fallthrough; case CFG80211_SIGNAL_TYPE_UNSPEC: if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.level = sinfo.signal; wstats.qual.qual = sinfo.signal; break; } fallthrough; default: wstats.qual.updated |= IW_QUAL_LEVEL_INVALID; wstats.qual.updated |= IW_QUAL_QUAL_INVALID; } wstats.qual.updated |= IW_QUAL_NOISE_INVALID; if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC)) wstats.discard.misc = sinfo.rx_dropped_misc; if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; cfg80211_sinfo_release_content(&sinfo); return &wstats; } static int cfg80211_wext_siwap(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); default: return -EOPNOTSUPP; } } static int cfg80211_wext_giwap(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); default: return -EOPNOTSUPP; } } static int cfg80211_wext_siwessid(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *ssid) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwessid(dev, info, data, ssid); case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_siwessid(dev, info, data, ssid); default: return -EOPNOTSUPP; } } static int cfg80211_wext_giwessid(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *ssid) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); data->flags = 0; data->length = 0; guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwessid(dev, info, data, ssid); case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_giwessid(dev, info, data, ssid); default: return -EOPNOTSUPP; } } static int cfg80211_wext_siwpmksa(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa)); if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; cfg_pmksa.bssid = pmksa->bssid.sa_data; cfg_pmksa.pmkid = pmksa->pmkid; guard(wiphy)(&rdev->wiphy); switch (pmksa->cmd) { case IW_PMKSA_ADD: if (!rdev->ops->set_pmksa) return -EOPNOTSUPP; return rdev_set_pmksa(rdev, dev, &cfg_pmksa); case IW_PMKSA_REMOVE: if (!rdev->ops->del_pmksa) return -EOPNOTSUPP; return rdev_del_pmksa(rdev, dev, &cfg_pmksa); case IW_PMKSA_FLUSH: if (!rdev->ops->flush_pmksa) return -EOPNOTSUPP; return rdev_flush_pmksa(rdev, dev); default: return -EOPNOTSUPP; } } static const iw_handler cfg80211_handlers[] = { IW_HANDLER(SIOCGIWNAME, cfg80211_wext_giwname), IW_HANDLER(SIOCSIWFREQ, cfg80211_wext_siwfreq), IW_HANDLER(SIOCGIWFREQ, cfg80211_wext_giwfreq), IW_HANDLER(SIOCSIWMODE, cfg80211_wext_siwmode), IW_HANDLER(SIOCGIWMODE, cfg80211_wext_giwmode), IW_HANDLER(SIOCGIWRANGE, cfg80211_wext_giwrange), IW_HANDLER(SIOCSIWAP, cfg80211_wext_siwap), IW_HANDLER(SIOCGIWAP, cfg80211_wext_giwap), IW_HANDLER(SIOCSIWMLME, cfg80211_wext_siwmlme), IW_HANDLER(SIOCSIWSCAN, cfg80211_wext_siwscan), IW_HANDLER(SIOCGIWSCAN, cfg80211_wext_giwscan), IW_HANDLER(SIOCSIWESSID, cfg80211_wext_siwessid), IW_HANDLER(SIOCGIWESSID, cfg80211_wext_giwessid), IW_HANDLER(SIOCSIWRATE, cfg80211_wext_siwrate), IW_HANDLER(SIOCGIWRATE, cfg80211_wext_giwrate), IW_HANDLER(SIOCSIWRTS, cfg80211_wext_siwrts), IW_HANDLER(SIOCGIWRTS, cfg80211_wext_giwrts), IW_HANDLER(SIOCSIWFRAG, cfg80211_wext_siwfrag), IW_HANDLER(SIOCGIWFRAG, cfg80211_wext_giwfrag), IW_HANDLER(SIOCSIWTXPOW, cfg80211_wext_siwtxpower), IW_HANDLER(SIOCGIWTXPOW, cfg80211_wext_giwtxpower), IW_HANDLER(SIOCSIWRETRY, cfg80211_wext_siwretry), IW_HANDLER(SIOCGIWRETRY, cfg80211_wext_giwretry), IW_HANDLER(SIOCSIWENCODE, cfg80211_wext_siwencode), IW_HANDLER(SIOCGIWENCODE, cfg80211_wext_giwencode), IW_HANDLER(SIOCSIWPOWER, cfg80211_wext_siwpower), IW_HANDLER(SIOCGIWPOWER, cfg80211_wext_giwpower), IW_HANDLER(SIOCSIWGENIE, cfg80211_wext_siwgenie), IW_HANDLER(SIOCSIWAUTH, cfg80211_wext_siwauth), IW_HANDLER(SIOCGIWAUTH, cfg80211_wext_giwauth), IW_HANDLER(SIOCSIWENCODEEXT, cfg80211_wext_siwencodeext), IW_HANDLER(SIOCSIWPMKSA, cfg80211_wext_siwpmksa), }; const struct iw_handler_def cfg80211_wext_handler = { .num_standard = ARRAY_SIZE(cfg80211_handlers), .standard = cfg80211_handlers, .get_wireless_stats = cfg80211_wireless_stats, };
13 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 /* * Copyright (c) 2001 The Regents of the University of Michigan. * All rights reserved. * * Kendrick Smith <kmsmith@umich.edu> * Andy Adamson <kandros@umich.edu> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include <linux/file.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> #include <linux/jhash.h> #include <linux/string_helpers.h> #include <linux/fsnotify.h> #include <linux/rhashtable.h> #include <linux/nfs_ssc.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" #include "current_stateid.h" #include "netns.h" #include "pnfs.h" #include "filecache.h" #include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC #define all_ones {{ ~0, ~0}, ~0} static const stateid_t one_stateid = { .si_generation = ~0, .si_opaque = all_ones, }; static const stateid_t zero_stateid = { /* all fields zero */ }; static const stateid_t currentstateid = { .si_generation = 1, }; static const stateid_t close_stateid = { .si_generation = 0xffffffffU, }; static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t))) #define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); static void nfsd4_file_hash_remove(struct nfs4_file *fi); static void deleg_reaper(struct nfsd_net *nn); /* Locking: */ /* * Currently used for the del_recall_lru and file hash table. In an * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more: */ static DEFINE_SPINLOCK(state_lock); enum nfsd4_st_mutex_lock_subclass { OPEN_STATEID_MUTEX = 0, LOCK_STATEID_MUTEX = 1, }; /* * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for * the refcount on the open stateid to drop. */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); /* * A waitqueue where a writer to clients/#/ctl destroying a client can * wait for cl_rpc_users to drop to 0 and then for the client to be * unhashed. */ static DECLARE_WAIT_QUEUE_HEAD(expiry_wq); static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops; static struct workqueue_struct *laundry_wq; int nfsd4_create_laundry_wq(void) { int rc = 0; laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) rc = -ENOMEM; return rc; } void nfsd4_destroy_laundry_wq(void) { destroy_workqueue(laundry_wq); } static bool is_session_dead(struct nfsd4_session *ses) { return ses->se_dead; } static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; ses->se_dead = true; return nfs_ok; } static bool is_client_expired(struct nfs4_client *clp) { return clp->cl_time == 0; } static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn, struct nfs4_client *clp) { if (clp->cl_state != NFSD4_ACTIVE) atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0); } static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_rpc_users); nfsd4_dec_courtesy_client_count(nn, clp); clp->cl_state = NFSD4_ACTIVE; return nfs_ok; } /* must be called under the client_lock */ static inline void renew_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (is_client_expired(clp)) { WARN_ON(1); printk("%s: client (clientid %08x/%08x) already expired\n", __func__, clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); return; } list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); nfsd4_dec_courtesy_client_count(nn, clp); clp->cl_state = NFSD4_ACTIVE; } static void put_client_renew_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); if (!atomic_dec_and_test(&clp->cl_rpc_users)) return; if (!is_client_expired(clp)) renew_client_locked(clp); else wake_up_all(&expiry_wq); } static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock)) return; if (!is_client_expired(clp)) renew_client_locked(clp); else wake_up_all(&expiry_wq); spin_unlock(&nn->client_lock); } static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) { __be32 status; if (is_session_dead(ses)) return nfserr_badsession; status = get_client_locked(ses->se_client); if (status) return status; atomic_inc(&ses->se_ref); return nfs_ok; } static void nfsd4_put_session_locked(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) free_session(ses); put_client_renew_locked(clp); } static void nfsd4_put_session(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); spin_lock(&nn->client_lock); nfsd4_put_session_locked(ses); spin_unlock(&nn->client_lock); } static struct nfsd4_blocked_lock * find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, struct nfsd_net *nn) { struct nfsd4_blocked_lock *cur, *found = NULL; spin_lock(&nn->blocked_locks_lock); list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); WARN_ON(list_empty(&cur->nbl_lru)); list_del_init(&cur->nbl_lru); found = cur; break; } } spin_unlock(&nn->blocked_locks_lock); if (found) locks_delete_block(&found->nbl_lock); return found; } static struct nfsd4_blocked_lock * find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, struct nfsd_net *nn) { struct nfsd4_blocked_lock *nbl; nbl = find_blocked_lock(lo, fh, nn); if (!nbl) { nbl = kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { INIT_LIST_HEAD(&nbl->nbl_list); INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); kref_init(&nbl->nbl_kref); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, &nfsd4_cb_notify_lock_ops, NFSPROC4_CLNT_CB_NOTIFY_LOCK); } } return nbl; } static void free_nbl(struct kref *kref) { struct nfsd4_blocked_lock *nbl; nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); locks_release_private(&nbl->nbl_lock); kfree(nbl); } static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { locks_delete_block(&nbl->nbl_lock); kref_put(&nbl->nbl_kref, free_nbl); } static void remove_blocked_locks(struct nfs4_lockowner *lo) { struct nfs4_client *clp = lo->lo_owner.so_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_blocked_lock *nbl; LIST_HEAD(reaplist); /* Dequeue all blocked locks */ spin_lock(&nn->blocked_locks_lock); while (!list_empty(&lo->lo_blocked)) { nbl = list_first_entry(&lo->lo_blocked, struct nfsd4_blocked_lock, nbl_list); list_del_init(&nbl->nbl_list); WARN_ON(list_empty(&nbl->nbl_lru)); list_move(&nbl->nbl_lru, &reaplist); } spin_unlock(&nn->blocked_locks_lock); /* Now free them */ while (!list_empty(&reaplist)) { nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } } static void nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) { struct nfsd4_blocked_lock *nbl = container_of(cb, struct nfsd4_blocked_lock, nbl_cb); locks_delete_block(&nbl->nbl_lock); } static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { trace_nfsd_cb_notify_lock_done(&zero_stateid, task); /* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and * just quit trying on anything else. */ switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 1 * HZ); return 0; default: return 1; } } static void nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) { struct nfsd4_blocked_lock *nbl = container_of(cb, struct nfsd4_blocked_lock, nbl_cb); free_blocked_lock(nbl); } static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { .prepare = nfsd4_cb_notify_lock_prepare, .done = nfsd4_cb_notify_lock_done, .release = nfsd4_cb_notify_lock_release, .opcode = OP_CB_NOTIFY_LOCK, }; /* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what * combinations of share bits previous opens have used. This allows us * to enforce the recommendation in * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that * the server return an error if the client attempt to downgrade to a * combination of share bits not explicable by closing some of its * previous opens. * * This enforcement is arguably incomplete, since we don't keep * track of access/deny bit combinations; so, e.g., we allow: * * OPEN allow read, deny write * OPEN allow both, deny none * DOWNGRADE allow read, deny none * * which we should reject. * * But you could also argue that our current code is already overkill, * since it only exists to return NFS4ERR_INVAL on incorrect client * behavior. */ static unsigned int bmap_to_share_mode(unsigned long bmap) { int i; unsigned int access = 0; for (i = 1; i < 4; i++) { if (test_bit(i, &bmap)) access |= i; } return access; } /* set share access for a given stateid */ static inline void set_access(u32 access, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << access; WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); stp->st_access_bmap |= mask; } /* clear share access for a given stateid */ static inline void clear_access(u32 access, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << access; WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); stp->st_access_bmap &= ~mask; } /* test whether a given stateid has access */ static inline bool test_access(u32 access, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << access; return (bool)(stp->st_access_bmap & mask); } /* set share deny for a given stateid */ static inline void set_deny(u32 deny, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << deny; WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); stp->st_deny_bmap |= mask; } /* clear share deny for a given stateid */ static inline void clear_deny(u32 deny, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << deny; WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); stp->st_deny_bmap &= ~mask; } /* test whether a given stateid is denying specific access */ static inline bool test_deny(u32 deny, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << deny; return (bool)(stp->st_deny_bmap & mask); } static int nfs4_access_to_omode(u32 access) { switch (access & NFS4_SHARE_ACCESS_BOTH) { case NFS4_SHARE_ACCESS_READ: return O_RDONLY; case NFS4_SHARE_ACCESS_WRITE: return O_WRONLY; case NFS4_SHARE_ACCESS_BOTH: return O_RDWR; } WARN_ON_ONCE(1); return O_RDONLY; } static inline int access_permit_read(struct nfs4_ol_stateid *stp) { return test_access(NFS4_SHARE_ACCESS_READ, stp) || test_access(NFS4_SHARE_ACCESS_BOTH, stp) || test_access(NFS4_SHARE_ACCESS_WRITE, stp); } static inline int access_permit_write(struct nfs4_ol_stateid *stp) { return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || test_access(NFS4_SHARE_ACCESS_BOTH, stp); } static inline struct nfs4_stateowner * nfs4_get_stateowner(struct nfs4_stateowner *sop) { atomic_inc(&sop->so_count); return sop; } static int same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) { return (sop->so_owner.len == owner->len) && 0 == memcmp(sop->so_owner.data, owner->data, owner->len); } static struct nfs4_openowner * find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_client *clp) { struct nfs4_stateowner *so; lockdep_assert_held(&clp->cl_lock); list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval], so_strhash) { if (!so->so_is_open_owner) continue; if (same_owner_str(so, &open->op_owner)) return openowner(nfs4_get_stateowner(so)); } return NULL; } static inline u32 opaque_hashval(const void *ptr, int nbytes) { unsigned char *cptr = (unsigned char *) ptr; u32 x = 0; while (nbytes--) { x *= 37; x += *cptr++; } return x; } void put_nfs4_file(struct nfs4_file *fi) { if (refcount_dec_and_test(&fi->fi_ref)) { nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); kfree_rcu(fi, fi_rcu); } } static struct nfsd_file * find_writeable_file_locked(struct nfs4_file *f) { struct nfsd_file *ret; lockdep_assert_held(&f->fi_lock); ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret) ret = nfsd_file_get(f->fi_fds[O_RDWR]); return ret; } static struct nfsd_file * find_writeable_file(struct nfs4_file *f) { struct nfsd_file *ret; spin_lock(&f->fi_lock); ret = find_writeable_file_locked(f); spin_unlock(&f->fi_lock); return ret; } static struct nfsd_file * find_readable_file_locked(struct nfs4_file *f) { struct nfsd_file *ret; lockdep_assert_held(&f->fi_lock); ret = nfsd_file_get(f->fi_fds[O_RDONLY]); if (!ret) ret = nfsd_file_get(f->fi_fds[O_RDWR]); return ret; } static struct nfsd_file * find_readable_file(struct nfs4_file *f) { struct nfsd_file *ret; spin_lock(&f->fi_lock); ret = find_readable_file_locked(f); spin_unlock(&f->fi_lock); return ret; } struct nfsd_file * find_any_file(struct nfs4_file *f) { struct nfsd_file *ret; if (!f) return NULL; spin_lock(&f->fi_lock); ret = nfsd_file_get(f->fi_fds[O_RDWR]); if (!ret) { ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret) ret = nfsd_file_get(f->fi_fds[O_RDONLY]); } spin_unlock(&f->fi_lock); return ret; } static struct nfsd_file *find_any_file_locked(struct nfs4_file *f) { lockdep_assert_held(&f->fi_lock); if (f->fi_fds[O_RDWR]) return f->fi_fds[O_RDWR]; if (f->fi_fds[O_WRONLY]) return f->fi_fds[O_WRONLY]; if (f->fi_fds[O_RDONLY]) return f->fi_fds[O_RDONLY]; return NULL; } static atomic_long_t num_delegations; unsigned long max_delegations; /* * Open owner state (share locks) */ /* hash tables for lock and open owners */ #define OWNER_HASH_BITS 8 #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); return ret & OWNER_HASH_MASK; } static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; static const struct rhashtable_params nfs4_file_rhash_params = { .key_len = sizeof_field(struct nfs4_file, fi_inode), .key_offset = offsetof(struct nfs4_file, fi_inode), .head_offset = offsetof(struct nfs4_file, fi_rlist), /* * Start with a single page hash table to reduce resizing churn * on light workloads. */ .min_size = 256, .automatic_shrinking = true, }; /* * Check if courtesy clients have conflicting access and resolve it if possible * * access: is op_share_access if share_access is true. * Check if access mode, op_share_access, would conflict with * the current deny mode of the file 'fp'. * access: is op_share_deny if share_access is false. * Check if the deny mode, op_share_deny, would conflict with * current access of the file 'fp'. * stp: skip checking this entry. * new_stp: normal open, not open upgrade. * * Function returns: * false - access/deny mode conflict with normal client. * true - no conflict or conflict with courtesy client(s) is resolved. */ static bool nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp, struct nfs4_ol_stateid *stp, u32 access, bool share_access) { struct nfs4_ol_stateid *st; bool resolvable = true; unsigned char bmap; struct nfsd_net *nn; struct nfs4_client *clp; lockdep_assert_held(&fp->fi_lock); list_for_each_entry(st, &fp->fi_stateids, st_perfile) { /* ignore lock stateid */ if (st->st_openstp) continue; if (st == stp && new_stp) continue; /* check file access against deny mode or vice versa */ bmap = share_access ? st->st_deny_bmap : st->st_access_bmap; if (!(access & bmap_to_share_mode(bmap))) continue; clp = st->st_stid.sc_client; if (try_to_expire_client(clp)) continue; resolvable = false; break; } if (resolvable) { clp = stp->st_stid.sc_client; nn = net_generic(clp->net, nfsd_net_id); mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); } return resolvable; } static void __nfs4_file_get_access(struct nfs4_file *fp, u32 access) { lockdep_assert_held(&fp->fi_lock); if (access & NFS4_SHARE_ACCESS_WRITE) atomic_inc(&fp->fi_access[O_WRONLY]); if (access & NFS4_SHARE_ACCESS_READ) atomic_inc(&fp->fi_access[O_RDONLY]); } static __be32 nfs4_file_get_access(struct nfs4_file *fp, u32 access) { lockdep_assert_held(&fp->fi_lock); /* Does this access mode make sense? */ if (access & ~NFS4_SHARE_ACCESS_BOTH) return nfserr_inval; /* Does it conflict with a deny mode already set? */ if ((access & fp->fi_share_deny) != 0) return nfserr_share_denied; __nfs4_file_get_access(fp, access); return nfs_ok; } static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny) { /* Common case is that there is no deny mode. */ if (deny) { /* Does this deny mode make sense? */ if (deny & ~NFS4_SHARE_DENY_BOTH) return nfserr_inval; if ((deny & NFS4_SHARE_DENY_READ) && atomic_read(&fp->fi_access[O_RDONLY])) return nfserr_share_denied; if ((deny & NFS4_SHARE_DENY_WRITE) && atomic_read(&fp->fi_access[O_WRONLY])) return nfserr_share_denied; } return nfs_ok; } static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { might_lock(&fp->fi_lock); if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { struct nfsd_file *f1 = NULL; struct nfsd_file *f2 = NULL; swap(f1, fp->fi_fds[oflag]); if (atomic_read(&fp->fi_access[1 - oflag]) == 0) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) nfsd_file_put(f1); if (f2) nfsd_file_put(f2); } } static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) { WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH); if (access & NFS4_SHARE_ACCESS_WRITE) __nfs4_file_put_access(fp, O_WRONLY); if (access & NFS4_SHARE_ACCESS_READ) __nfs4_file_put_access(fp, O_RDONLY); } /* * Allocate a new open/delegation state counter. This is needed for * pNFS for proper return on close semantics. * * Note that we only allocate it for pNFS-enabled exports, otherwise * all pointers to struct nfs4_clnt_odstate are always NULL. */ static struct nfs4_clnt_odstate * alloc_clnt_odstate(struct nfs4_client *clp) { struct nfs4_clnt_odstate *co; co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); if (co) { co->co_client = clp; refcount_set(&co->co_odcount, 1); } return co; } static void hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co) { struct nfs4_file *fp = co->co_file; lockdep_assert_held(&fp->fi_lock); list_add(&co->co_perfile, &fp->fi_clnt_odstate); } static inline void get_clnt_odstate(struct nfs4_clnt_odstate *co) { if (co) refcount_inc(&co->co_odcount); } static void put_clnt_odstate(struct nfs4_clnt_odstate *co) { struct nfs4_file *fp; if (!co) return; fp = co->co_file; if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { list_del(&co->co_perfile); spin_unlock(&fp->fi_lock); nfsd4_return_all_file_layouts(co->co_client, fp); kmem_cache_free(odstate_slab, co); } } static struct nfs4_clnt_odstate * find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new) { struct nfs4_clnt_odstate *co; struct nfs4_client *cl; if (!new) return NULL; cl = new->co_client; spin_lock(&fp->fi_lock); list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { if (co->co_client == cl) { get_clnt_odstate(co); goto out; } } co = new; co->co_file = fp; hash_clnt_odstate_locked(new); out: spin_unlock(&fp->fi_lock); return co; } struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; stid = kmem_cache_zalloc(slab, GFP_KERNEL); if (!stid) return NULL; idr_preload(GFP_KERNEL); spin_lock(&cl->cl_lock); /* Reserving 0 for start of file in nfsdfs "states" file: */ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT); spin_unlock(&cl->cl_lock); idr_preload_end(); if (new_id < 0) goto out_free; stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); INIT_LIST_HEAD(&stid->sc_cp_list); return stid; out_free: kmem_cache_free(slab, stid); return NULL; } /* * Create a unique stateid_t to represent each COPY. */ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, unsigned char cs_type) { int new_id; stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); stid->cs_stid.si_opaque.so_id = new_id; stid->cs_stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; stid->cs_type = cs_type; return 1; } int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy) { return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID); } struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, struct nfs4_stid *p_stid) { struct nfs4_cpntf_state *cps; cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); if (!cps) return NULL; cps->cpntf_time = ktime_get_boottime_seconds(); refcount_set(&cps->cp_stateid.cs_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; spin_lock(&nn->s2s_cp_lock); list_add(&cps->cp_list, &p_stid->sc_cp_list); spin_unlock(&nn->s2s_cp_lock); return cps; out_free: kfree(cps); return NULL; } void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; if (copy->cp_stateid.cs_type != NFS4_COPY_STID) return; nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.cs_stid.si_opaque.so_id); spin_unlock(&nn->s2s_cp_lock); } static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid) { struct nfs4_cpntf_state *cps; struct nfsd_net *nn; nn = net_generic(net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); while (!list_empty(&stid->sc_cp_list)) { cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cpntf_state, cp_list); _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); } static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; return openlockstateid(stid); } /* * As the sc_free callback of deleg, this may be called by nfs4_put_stid * in nfsd_break_one_deleg. * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, * this function mustn't ever sleep. */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); WARN_ON_ONCE(!list_empty(&stid->sc_cp_list)); WARN_ON_ONCE(!list_empty(&dp->dl_perfile)); WARN_ON_ONCE(!list_empty(&dp->dl_perclnt)); WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } /* * When we recall a delegation, we should be careful not to hand it * out again straight away. * To ensure this we keep a pair of bloom filters ('new' and 'old') * in which the filehandles of recalled delegations are "stored". * If a filehandle appear in either filter, a delegation is blocked. * When a delegation is recalled, the filehandle is stored in the "new" * filter. * Every 30 seconds we swap the filters and clear the "new" one, * unless both are empty of course. This results in delegations for a * given filehandle being blocked for between 30 and 60 seconds. * * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. * * 'blocked_delegations_lock', which is always taken in block_delegations(), * is used to manage concurrent access. Testing does not need the lock * except when swapping the two filters. */ static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; time64_t swap_time; int new; /* index into 'set' */ DECLARE_BITMAP(set[2], 256); } blocked_delegations; static int delegation_blocked(struct knfsd_fh *fh) { u32 hash; struct bloom_pair *bd = &blocked_delegations; if (bd->entries == 0) return 0; if (ktime_get_seconds() - bd->swap_time > 30) { spin_lock(&blocked_delegations_lock); if (ktime_get_seconds() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; bd->new = 1-bd->new; memset(bd->set[bd->new], 0, sizeof(bd->set[0])); bd->swap_time = ktime_get_seconds(); } spin_unlock(&blocked_delegations_lock); } hash = jhash(&fh->fh_raw, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) return 1; if (test_bit(hash&255, bd->set[1]) && test_bit((hash>>8)&255, bd->set[1]) && test_bit((hash>>16)&255, bd->set[1])) return 1; return 0; } static void block_delegations(struct knfsd_fh *fh) { u32 hash; struct bloom_pair *bd = &blocked_delegations; hash = jhash(&fh->fh_raw, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) bd->swap_time = ktime_get_seconds(); bd->entries += 1; spin_unlock(&blocked_delegations_lock); } static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate, u32 dl_type) { struct nfs4_delegation *dp; struct nfs4_stid *stid; long n; dprintk("NFSD alloc_init_deleg\n"); n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; stid = nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg); if (stid == NULL) goto out_dec; dp = delegstateid(stid); /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid * 0 anyway just for consistency and use 1: */ dp->dl_stid.sc_stateid.si_generation = 1; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); dp->dl_clnt_odstate = odstate; get_clnt_odstate(odstate); dp->dl_type = dl_type; dp->dl_retries = 1; dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); dp->dl_cb_fattr.ncf_file_modified = false; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; out_dec: atomic_long_dec(&num_delegations); return NULL; } void nfs4_put_stid(struct nfs4_stid *s) { struct nfs4_file *fp = s->sc_file; struct nfs4_client *clp = s->sc_client; might_lock(&clp->cl_lock); if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { wake_up_all(&close_wq); return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); if (s->sc_status & SC_STATUS_ADMIN_REVOKED) atomic_dec(&s->sc_client->cl_admin_revoked); nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); if (fp) put_nfs4_file(fp); } void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) { stateid_t *src = &stid->sc_stateid; spin_lock(&stid->sc_lock); if (unlikely(++src->si_generation == 0)) src->si_generation = 1; memcpy(dst, src, sizeof(*dst)); spin_unlock(&stid->sc_lock); } static void put_deleg_file(struct nfs4_file *fp) { struct nfsd_file *rnf = NULL; struct nfsd_file *nf = NULL; spin_lock(&fp->fi_lock); if (--fp->fi_delegees == 0) { swap(nf, fp->fi_deleg_file); swap(rnf, fp->fi_rdeleg_file); } spin_unlock(&fp->fi_lock); if (nf) nfsd_file_put(nf); if (rnf) nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ); } static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f) { struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME }; struct inode *inode = file_inode(f); int ret; /* don't do anything if FMODE_NOCMTIME isn't set */ if ((READ_ONCE(f->f_mode) & FMODE_NOCMTIME) == 0) return; spin_lock(&f->f_lock); f->f_mode &= ~FMODE_NOCMTIME; spin_unlock(&f->f_lock); /* was it never written? */ if (!dp->dl_written) return; /* did it get a setattr for the timestamps at some point? */ if (dp->dl_setattr) return; /* Stamp everything to "now" */ inode_lock(inode); ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &ia, NULL); inode_unlock(inode); if (ret) { struct inode *inode = file_inode(f); pr_notice_ratelimited("Unable to update timestamps on inode %02x:%02x:%lu: %d\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino, ret); } } static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfsd_file *nf = fp->fi_deleg_file; WARN_ON_ONCE(!fp->fi_delegees); nfsd4_finalize_deleg_timestamps(dp, nf->nf_file); kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp); put_deleg_file(fp); } static void destroy_unhashed_deleg(struct nfs4_delegation *dp) { put_clnt_odstate(dp->dl_clnt_odstate); nfs4_unlock_deleg_lease(dp); nfs4_put_stid(&dp->dl_stid); } /** * nfs4_delegation_exists - Discover if this delegation already exists * @clp: a pointer to the nfs4_client we're granting a delegation to * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: * On success: true iff an existing delegation is found */ static bool nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp) { struct nfs4_delegation *searchdp = NULL; struct nfs4_client *searchclp = NULL; lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { searchclp = searchdp->dl_stid.sc_client; if (clp == searchclp) { return true; } } return false; } /** * hash_delegation_locked - Add a delegation to the appropriate lists * @dp: a pointer to the nfs4_delegation we are adding. * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: * On success: NULL if the delegation was successfully hashed. * * On error: -EAGAIN if one was previously granted to this * nfs4_client for this nfs4_file. Delegation is not hashed. * */ static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { struct nfs4_client *clp = dp->dl_stid.sc_client; lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); lockdep_assert_held(&clp->cl_lock); if (nfs4_delegation_exists(clp, fp)) return -EAGAIN; refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = SC_TYPE_DELEG; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); return 0; } static bool delegation_hashed(struct nfs4_delegation *dp) { return !(list_empty(&dp->dl_perfile)); } static bool unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask) { struct nfs4_file *fp = dp->dl_stid.sc_file; lockdep_assert_held(&state_lock); if (!delegation_hashed(dp)) return false; if (statusmask == SC_STATUS_REVOKED && dp->dl_stid.sc_client->cl_minorversion == 0) statusmask = SC_STATUS_CLOSED; dp->dl_stid.sc_status |= statusmask; if (statusmask & SC_STATUS_ADMIN_REVOKED) atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked); /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; spin_lock(&fp->fi_lock); list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); return true; } static void destroy_delegation(struct nfs4_delegation *dp) { bool unhashed; spin_lock(&state_lock); unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED); spin_unlock(&state_lock); if (unhashed) destroy_unhashed_deleg(dp); } /** * revoke_delegation - perform nfs4 delegation structure cleanup * @dp: pointer to the delegation * * This function assumes that it's called either from the administrative * interface (nfsd4_revoke_states()) that's revoking a specific delegation * stateid or it's called from a laundromat thread (nfsd4_landromat()) that * determined that this specific state has expired and needs to be revoked * (both mark state with the appropriate stid sc_status mode). It is also * assumed that a reference was taken on the @dp state. * * If this function finds that the @dp state is SC_STATUS_FREED it means * that a FREE_STATEID operation for this stateid has been processed and * we can proceed to removing it from recalled list. However, if @dp state * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked * list and wait for the FREE_STATEID to arrive from the client. At the same * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the * nfsd4_free_stateid() function that this stateid has already been added * to the cl_revoked list and that nfsd4_free_stateid() is now responsible * for removing it from the list. Inspection of where the delegation state * in the revocation process is protected by the clp->cl_lock. */ static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 && !(dp->dl_stid.sc_status & (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); spin_lock(&clp->cl_lock); if (dp->dl_stid.sc_status & SC_STATUS_FREED) { list_del_init(&dp->dl_recall_lru); goto out; } list_add(&dp->dl_recall_lru, &clp->cl_revoked); dp->dl_stid.sc_status |= SC_STATUS_FREEABLE; out: spin_unlock(&clp->cl_lock); destroy_unhashed_deleg(dp); } /* * SETCLIENTID state */ static unsigned int clientid_hashval(u32 id) { return id & CLIENT_HASH_MASK; } static unsigned int clientstr_hashval(struct xdr_netobj name) { return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK; } /* * A stateid that had a deny mode associated with it is being released * or downgraded. Recalculate the deny mode on the file. */ static void recalculate_deny_mode(struct nfs4_file *fp) { struct nfs4_ol_stateid *stp; u32 old_deny; spin_lock(&fp->fi_lock); old_deny = fp->fi_share_deny; fp->fi_share_deny = 0; list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); if (fp->fi_share_deny == old_deny) break; } spin_unlock(&fp->fi_lock); } static void reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) { int i; bool change = false; for (i = 1; i < 4; i++) { if ((i & deny) != i) { change = true; clear_deny(i, stp); } } /* Recalculate per-file deny mode if there was a change */ if (change) recalculate_deny_mode(stp->st_stid.sc_file); } /* release all access and file references for a given stateid */ static void release_all_access(struct nfs4_ol_stateid *stp) { int i; struct nfs4_file *fp = stp->st_stid.sc_file; if (fp && stp->st_deny_bmap != 0) recalculate_deny_mode(fp); for (i = 1; i < 4; i++) { if (test_access(i, stp)) nfs4_file_put_access(stp->st_stid.sc_file, i); clear_access(i, stp); } } static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop) { kfree(sop->so_owner.data); sop->so_ops->so_free(sop); } static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { struct nfs4_client *clp = sop->so_client; might_lock(&clp->cl_lock); if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock)) return; sop->so_ops->so_unhash(sop); spin_unlock(&clp->cl_lock); nfs4_free_stateowner(sop); } static bool nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp) { return list_empty(&stp->st_perfile); } static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); if (list_empty(&stp->st_perfile)) return false; spin_lock(&fp->fi_lock); list_del_init(&stp->st_perfile); spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); return true; } static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { struct nfs4_ol_stateid *stp = openlockstateid(stid); put_clnt_odstate(stp->st_clnt_odstate); release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); WARN_ON(!list_empty(&stid->sc_cp_list)); kmem_cache_free(stateid_slab, stid); } static void nfs4_free_lock_stateid(struct nfs4_stid *stid) { struct nfs4_ol_stateid *stp = openlockstateid(stid); struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); struct nfsd_file *nf; nf = find_any_file(stp->st_stid.sc_file); if (nf) { get_file(nf->nf_file); filp_close(nf->nf_file, (fl_owner_t)lo); nfsd_file_put(nf); } nfs4_free_ol_stateid(stid); } /* * Put the persistent reference to an already unhashed generic stateid, while * holding the cl_lock. If it's the last reference, then put it onto the * reaplist for later destruction. */ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { struct nfs4_stid *s = &stp->st_stid; struct nfs4_client *clp = s->sc_client; lockdep_assert_held(&clp->cl_lock); WARN_ON_ONCE(!list_empty(&stp->st_locks)); if (!refcount_dec_and_test(&s->sc_count)) { wake_up_all(&close_wq); return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); if (s->sc_status & SC_STATUS_ADMIN_REVOKED) atomic_dec(&s->sc_client->cl_admin_revoked); list_add(&stp->st_locks, reaplist); } static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); if (!unhash_ol_stateid(stp)) return false; list_del_init(&stp->st_locks); stp->st_stid.sc_status |= SC_STATUS_CLOSED; return true; } static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_client *clp = stp->st_stid.sc_client; bool unhashed; spin_lock(&clp->cl_lock); unhashed = unhash_lock_stateid(stp); spin_unlock(&clp->cl_lock); if (unhashed) nfs4_put_stid(&stp->st_stid); } static void unhash_lockowner_locked(struct nfs4_lockowner *lo) { struct nfs4_client *clp = lo->lo_owner.so_client; lockdep_assert_held(&clp->cl_lock); list_del_init(&lo->lo_owner.so_strhash); } /* * Free a list of generic stateids that were collected earlier after being * fully unhashed. */ static void free_ol_stateid_reaplist(struct list_head *reaplist) { struct nfs4_ol_stateid *stp; struct nfs4_file *fp; might_sleep(); while (!list_empty(reaplist)) { stp = list_first_entry(reaplist, struct nfs4_ol_stateid, st_locks); list_del(&stp->st_locks); fp = stp->st_stid.sc_file; stp->st_stid.sc_free(&stp->st_stid); if (fp) put_nfs4_file(fp); } } static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, struct list_head *reaplist) { struct nfs4_ol_stateid *stp; lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock); while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); unhash_lock_stateid(stp); put_ol_stateid_locked(stp, reaplist); } } static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); if (!unhash_ol_stateid(stp)) return false; release_open_stateid_locks(stp, reaplist); return true; } static void release_open_stateid(struct nfs4_ol_stateid *stp) { LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); stp->st_stid.sc_status |= SC_STATUS_CLOSED; if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); free_ol_stateid_reaplist(&reaplist); } static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo) { lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); return list_empty(&oo->oo_owner.so_strhash) && list_empty(&oo->oo_perclient); } static void unhash_openowner_locked(struct nfs4_openowner *oo) { struct nfs4_client *clp = oo->oo_owner.so_client; lockdep_assert_held(&clp->cl_lock); list_del_init(&oo->oo_owner.so_strhash); list_del_init(&oo->oo_perclient); } static void release_last_closed_stateid(struct nfs4_openowner *oo) { struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, nfsd_net_id); struct nfs4_ol_stateid *s; spin_lock(&nn->client_lock); s = oo->oo_last_closed_stid; if (s) { list_del_init(&oo->oo_close_lru); oo->oo_last_closed_stid = NULL; } spin_unlock(&nn->client_lock); if (s) nfs4_put_stid(&s->st_stid); } static void release_openowner(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; struct nfs4_client *clp = oo->oo_owner.so_client; LIST_HEAD(reaplist); spin_lock(&clp->cl_lock); unhash_openowner_locked(oo); while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); } static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp, struct super_block *sb, unsigned int sc_types) { unsigned long id, tmp; struct nfs4_stid *stid; spin_lock(&clp->cl_lock); idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) if ((stid->sc_type & sc_types) && stid->sc_status == 0 && stid->sc_file->fi_inode->i_sb == sb) { refcount_inc(&stid->sc_count); break; } spin_unlock(&clp->cl_lock); return stid; } /** * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem * @net: used to identify instance of nfsd (there is one per net namespace) * @sb: super_block used to identify target filesystem * * All nfs4 states (open, lock, delegation, layout) held by the server instance * and associated with a file on the given filesystem will be revoked resulting * in any files being closed and so all references from nfsd to the filesystem * being released. Thus nfsd will no longer prevent the filesystem from being * unmounted. * * The clients which own the states will subsequently being notified that the * states have been "admin-revoked". */ void nfsd4_revoke_states(struct net *net, struct super_block *sb) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); unsigned int idhashval; unsigned int sc_types; sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT; spin_lock(&nn->client_lock); for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) { struct list_head *head = &nn->conf_id_hashtbl[idhashval]; struct nfs4_client *clp; retry: list_for_each_entry(clp, head, cl_idhash) { struct nfs4_stid *stid = find_one_sb_stid(clp, sb, sc_types); if (stid) { struct nfs4_ol_stateid *stp; struct nfs4_delegation *dp; struct nfs4_layout_stateid *ls; spin_unlock(&nn->client_lock); switch (stid->sc_type) { case SC_TYPE_OPEN: stp = openlockstateid(stid); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); spin_lock(&clp->cl_lock); if (stid->sc_status == 0) { stid->sc_status |= SC_STATUS_ADMIN_REVOKED; atomic_inc(&clp->cl_admin_revoked); spin_unlock(&clp->cl_lock); release_all_access(stp); } else spin_unlock(&clp->cl_lock); mutex_unlock(&stp->st_mutex); break; case SC_TYPE_LOCK: stp = openlockstateid(stid); mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); spin_lock(&clp->cl_lock); if (stid->sc_status == 0) { struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); struct nfsd_file *nf; stid->sc_status |= SC_STATUS_ADMIN_REVOKED; atomic_inc(&clp->cl_admin_revoked); spin_unlock(&clp->cl_lock); nf = find_any_file(stp->st_stid.sc_file); if (nf) { get_file(nf->nf_file); filp_close(nf->nf_file, (fl_owner_t)lo); nfsd_file_put(nf); } release_all_access(stp); } else spin_unlock(&clp->cl_lock); mutex_unlock(&stp->st_mutex); break; case SC_TYPE_DELEG: refcount_inc(&stid->sc_count); dp = delegstateid(stid); spin_lock(&state_lock); if (!unhash_delegation_locked( dp, SC_STATUS_ADMIN_REVOKED)) dp = NULL; spin_unlock(&state_lock); if (dp) revoke_delegation(dp); break; case SC_TYPE_LAYOUT: ls = layoutstateid(stid); nfsd4_close_layout(ls); break; } nfs4_put_stid(stid); spin_lock(&nn->client_lock); if (clp->cl_minorversion == 0) /* Allow cleanup after a lease period. * store_release ensures cleanup will * see any newly revoked states if it * sees the time updated. */ nn->nfs40_last_revoke = ktime_get_boottime_seconds(); goto retry; } } } spin_unlock(&nn->client_lock); } static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; return sid->sequence % SESSION_HASH_SIZE; } #ifdef CONFIG_SUNRPC_DEBUG static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { u32 *ptr = (u32 *)(&sessionid->data[0]); dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); } #else static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { } #endif /* * Bump the seqid on cstate->replay_owner, and clear replay_owner if it * won't be used for replay. */ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) { struct nfs4_stateowner *so = cstate->replay_owner; if (nfserr == nfserr_replay_me) return; if (!seqid_mutating_err(ntohl(nfserr))) { nfsd4_cstate_clear_replay(cstate); return; } if (!so) return; if (so->so_is_open_owner) release_last_closed_stateid(openowner(so)); so->so_seqid++; return; } static void gen_sessionid(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_sessionid *sid; sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; sid->clientid = clp->cl_clientid; sid->sequence = current_sessionid++; sid->reserved = 0; } /* * The protocol defines ca_maxresponssize_cached to include the size of * the rpc header, but all we need to cache is the data starting after * the end of the initial SEQUENCE operation--the rest we regenerate * each time. Therefore we can advertise a ca_maxresponssize_cached * value that is the number of bytes in our cache plus a few additional * bytes. In order to stay on the safe side, and not promise more than * we can cache, those additional bytes must be the minimum possible: 24 * bytes of rpc header (xid through accept state, with AUTH_NULL * verifier), 12 for the compound header (with zero-length tag), and 44 * for the SEQUENCE op response: */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) static struct shrinker *nfsd_slot_shrinker; static DEFINE_SPINLOCK(nfsd_session_list_lock); static LIST_HEAD(nfsd_session_list); /* The sum of "target_slots-1" on every session. The shrinker can push this * down, though it can take a little while for the memory to actually * be freed. The "-1" is because we can never free slot 0 while the * session is active. */ static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); static void free_session_slots(struct nfsd4_session *ses, int from) { int i; if (from >= ses->se_fchannel.maxreqs) return; for (i = from; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); /* * Save the seqid in case we reactivate this slot. * This will never require a memory allocation so GFP * flag is irrelevant */ xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0); free_svc_cred(&slot->sl_cred); kfree(slot); } ses->se_fchannel.maxreqs = from; if (ses->se_target_maxslots > from) { int new_target = from ?: 1; atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); ses->se_target_maxslots = new_target; } } /** * reduce_session_slots - reduce the target max-slots of a session if possible * @ses: The session to affect * @dec: how much to decrease the target by * * This interface can be used by a shrinker to reduce the target max-slots * for a session so that some slots can eventually be freed. * It uses spin_trylock() as it may be called in a context where another * spinlock is held that has a dependency on client_lock. As shrinkers are * best-effort, skiping a session is client_lock is already held has no * great coast * * Return value: * The number of slots that the target was reduced by. */ static int reduce_session_slots(struct nfsd4_session *ses, int dec) { struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); int ret = 0; if (ses->se_target_maxslots <= 1) return ret; if (!spin_trylock(&nn->client_lock)) return ret; ret = min(dec, ses->se_target_maxslots-1); ses->se_target_maxslots -= ret; atomic_sub(ret, &nfsd_total_target_slots); ses->se_slot_gen += 1; if (ses->se_slot_gen == 0) { int i; ses->se_slot_gen = 1; for (i = 0; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); slot->sl_generation = 0; } } spin_unlock(&nn->client_lock); return ret; } static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs, int index, gfp_t gfp) { struct nfsd4_slot *slot; size_t size; /* * The RPC and NFS session headers are never saved in * the slot reply cache buffer. */ size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ? 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; slot = kzalloc(struct_size(slot, sl_data, size), gfp); if (!slot) return NULL; slot->sl_index = index; return slot; } static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { int numslots = fattrs->maxreqs; struct nfsd4_session *new; struct nfsd4_slot *slot; int i; new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; xa_init(&new->se_slots); slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL); if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL))) goto out_free; for (i = 1; i < numslots; i++) { const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; slot = nfsd4_alloc_slot(fattrs, i, gfp); if (!slot) break; if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) { kfree(slot); break; } } fattrs->maxreqs = i; memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); new->se_target_maxslots = i; atomic_add(i - 1, &nfsd_total_target_slots); new->se_cb_slot_avail = ~0U; new->se_cb_highest_slot = min(battrs->maxreqs - 1, NFSD_BC_SLOT_TABLE_SIZE - 1); spin_lock_init(&new->se_lock); return new; out_free: kfree(slot); xa_destroy(&new->se_slots); kfree(new); return NULL; } static void free_conn(struct nfsd4_conn *c) { svc_xprt_put(c->cn_xprt); kfree(c); } static void nfsd4_conn_lost(struct svc_xpt_user *u) { struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); struct nfs4_client *clp = c->cn_session->se_client; trace_nfsd_cb_lost(clp); spin_lock(&clp->cl_lock); if (!list_empty(&c->cn_persession)) { list_del(&c->cn_persession); free_conn(c); } nfsd4_probe_callback(clp); spin_unlock(&clp->cl_lock); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) { struct nfsd4_conn *conn; conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); if (!conn) return NULL; svc_xprt_get(rqstp->rq_xprt); conn->cn_xprt = rqstp->rq_xprt; conn->cn_flags = flags; INIT_LIST_HEAD(&conn->cn_xpt_user.list); return conn; } static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) { conn->cn_session = ses; list_add(&conn->cn_persession, &ses->se_conns); } static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; spin_lock(&clp->cl_lock); __nfsd4_hash_conn(conn, ses); spin_unlock(&clp->cl_lock); } static int nfsd4_register_conn(struct nfsd4_conn *conn) { conn->cn_xpt_user.callback = nfsd4_conn_lost; return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); } static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses) { int ret; nfsd4_hash_conn(conn, ses); ret = nfsd4_register_conn(conn); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); /* We may have gained or lost a callback channel: */ nfsd4_probe_callback_sync(ses->se_client); } static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) { u32 dir = NFS4_CDFC4_FORE; if (cses->flags & SESSION4_BACK_CHAN) dir |= NFS4_CDFC4_BACK; return alloc_conn(rqstp, dir); } /* must be called under client_lock */ static void nfsd4_del_conns(struct nfsd4_session *s) { struct nfs4_client *clp = s->se_client; struct nfsd4_conn *c; spin_lock(&clp->cl_lock); while (!list_empty(&s->se_conns)) { c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); list_del_init(&c->cn_persession); spin_unlock(&clp->cl_lock); unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); free_conn(c); spin_lock(&clp->cl_lock); } spin_unlock(&clp->cl_lock); } static void __free_session(struct nfsd4_session *ses) { free_session_slots(ses, 0); xa_destroy(&ses->se_slots); kfree(ses); } static void free_session(struct nfsd4_session *ses) { nfsd4_del_conns(ses); __free_session(ses); } static unsigned long nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) { unsigned long cnt = atomic_read(&nfsd_total_target_slots); return cnt ? cnt : SHRINK_EMPTY; } static unsigned long nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) { struct nfsd4_session *ses; unsigned long scanned = 0; unsigned long freed = 0; spin_lock(&nfsd_session_list_lock); list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { freed += reduce_session_slots(ses, 1); scanned += 1; if (scanned >= sc->nr_to_scan) { /* Move starting point for next scan */ list_move(&nfsd_session_list, &ses->se_all_sessions); break; } } spin_unlock(&nfsd_session_list_lock); sc->nr_scanned = scanned; return freed; } static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new->se_client = clp; gen_sessionid(new); INIT_LIST_HEAD(&new->se_conns); atomic_set(&new->se_ref, 0); new->se_dead = false; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx) new->se_cb_seq_nr[idx] = 1; idx = hash_sessionid(&new->se_sessionid); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); spin_lock(&nfsd_session_list_lock); list_add_tail(&new->se_all_sessions, &nfsd_session_list); spin_unlock(&nfsd_session_list_lock); { struct sockaddr *sa = svc_addr(rqstp); /* * This is a little silly; with sessions there's no real * use for the callback address. Use the peer address * as a reasonable default for now, but consider fixing * the rpc client not to require an address in the * future: */ rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); } } /* caller must hold client_lock */ static struct nfsd4_session * __find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; struct nfsd_net *nn = net_generic(net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { if (!memcmp(elem->se_sessionid.data, sessionid->data, NFS4_MAX_SESSIONID_LEN)) { return elem; } } dprintk("%s: session not found\n", __func__); return NULL; } static struct nfsd4_session * find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net, __be32 *ret) { struct nfsd4_session *session; __be32 status = nfserr_badsession; session = __find_in_sessionid_hashtbl(sessionid, net); if (!session) goto out; status = nfsd4_get_session_locked(session); if (status) session = NULL; out: *ret = status; return session; } /* caller must hold client_lock */ static void unhash_session(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); list_del(&ses->se_hash); spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); spin_unlock(&ses->se_client->cl_lock); spin_lock(&nfsd_session_list_lock); list_del(&ses->se_all_sessions); spin_unlock(&nfsd_session_list_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) { /* * We're assuming the clid was not given out from a boot * precisely 2^32 (about 136 years) before this one. That seems * a safe assumption: */ if (clid->cl_boot == (u32)nn->boot_time) return 0; trace_nfsd_clid_stale(clid); return 1; } static struct nfs4_client *alloc_client(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client *clp; int i; if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients && atomic_read(&nn->nfsd_courtesy_clients) > 0) mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL); if (clp->cl_name.data == NULL) goto err_no_name; clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!clp->cl_ownerstr_hashtbl) goto err_no_hashtbl; clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); if (!clp->cl_callback_wq) goto err_no_callback_wq; for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; clp->cl_state = NFSD4_ACTIVE; atomic_inc(&nn->nfs4_client_count); atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); INIT_LIST_HEAD(&clp->cl_revoked); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&clp->cl_lo_states); #endif INIT_LIST_HEAD(&clp->async_copies); spin_lock_init(&clp->async_lock); spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; err_no_callback_wq: kfree(clp->cl_ownerstr_hashtbl); err_no_hashtbl: kfree(clp->cl_name.data); err_no_name: kmem_cache_free(client_slab, clp); return NULL; } static void __free_client(struct kref *k) { struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref); struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); free_svc_cred(&clp->cl_cred); destroy_workqueue(clp->cl_callback_wq); kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } static void drop_client(struct nfs4_client *clp) { kref_put(&clp->cl_nfsdfs.cl_ref, __free_client); } static void free_client(struct nfs4_client *clp) { while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); list_del(&ses->se_perclnt); WARN_ON_ONCE(atomic_read(&ses->se_ref)); free_session(ses); } rpc_destroy_wait_queue(&clp->cl_cb_waitq); if (clp->cl_nfsd_dentry) { nfsd_client_rmdir(clp->cl_nfsd_dentry); clp->cl_nfsd_dentry = NULL; wake_up_all(&expiry_wq); } drop_client(clp); } /* must be called under the client_lock */ static void unhash_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses; lockdep_assert_held(&nn->client_lock); /* Mark the client as expired! */ clp->cl_time = 0; /* Make it invisible */ if (!list_empty(&clp->cl_idhash)) { list_del_init(&clp->cl_idhash); if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) rb_erase(&clp->cl_namenode, &nn->conf_name_tree); else rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); } list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); spin_lock(&nfsd_session_list_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { list_del_init(&ses->se_hash); list_del_init(&ses->se_all_sessions); } spin_unlock(&nfsd_session_list_lock); spin_unlock(&clp->cl_lock); } static void unhash_client(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); spin_lock(&nn->client_lock); unhash_client_locked(clp); spin_unlock(&nn->client_lock); } static __be32 mark_client_expired_locked(struct nfs4_client *clp) { int users = atomic_read(&clp->cl_rpc_users); trace_nfsd_mark_client_expired(clp, users); if (users) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; } static void __destroy_client(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; LIST_HEAD(reaplist); spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); unhash_delegation_locked(dp, SC_STATUS_CLOSED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); destroy_unhashed_deleg(dp); } while (!list_empty(&clp->cl_revoked)) { dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); nfs4_get_stateowner(&oo->oo_owner); release_openowner(oo); } for (i = 0; i < OWNER_HASH_SIZE; i++) { struct nfs4_stateowner *so, *tmp; list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i], so_strhash) { /* Should be no openowners at this point */ WARN_ON_ONCE(so->so_is_open_owner); remove_blocked_locks(lockowner(so)); } } nfsd4_return_all_client_layouts(clp); nfsd4_shutdown_copy(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); atomic_add_unless(&nn->nfs4_client_count, -1, 0); nfsd4_dec_courtesy_client_count(nn, clp); free_client(clp); wake_up_all(&expiry_wq); } static void destroy_client(struct nfs4_client *clp) { unhash_client(clp); __destroy_client(clp); } static void inc_reclaim_complete(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (!nn->track_reclaim_completes) return; if (!nfsd4_find_reclaim_client(clp->cl_name, nn)) return; if (atomic_inc_return(&nn->nr_reclaim_complete) == nn->reclaim_str_hashtbl_size) { printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", clp->net->ns.inum); nfsd4_end_grace(nn); } } static void expire_client(struct nfs4_client *clp) { unhash_client(clp); nfsd4_client_record_remove(clp); __destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) { memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data)); } static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) { target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; target->cl_clientid.cl_id = source->cl_clientid.cl_id; } static int copy_cred(struct svc_cred *target, struct svc_cred *source) { target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL); target->cr_raw_principal = kstrdup(source->cr_raw_principal, GFP_KERNEL); target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL); if ((source->cr_principal && !target->cr_principal) || (source->cr_raw_principal && !target->cr_raw_principal) || (source->cr_targ_princ && !target->cr_targ_princ)) return -ENOMEM; target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); target->cr_gss_mech = source->cr_gss_mech; if (source->cr_gss_mech) gss_mech_get(source->cr_gss_mech); return 0; } static int compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) { if (o1->len < o2->len) return -1; if (o1->len > o2->len) return 1; return memcmp(o1->data, o2->data, o1->len); } static int same_verf(nfs4_verifier *v1, nfs4_verifier *v2) { return 0 == memcmp(v1->data, v2->data, sizeof(v1->data)); } static int same_clid(clientid_t *cl1, clientid_t *cl2) { return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); } static bool groups_equal(struct group_info *g1, struct group_info *g2) { int i; if (g1->ngroups != g2->ngroups) return false; for (i=0; i<g1->ngroups; i++) if (!gid_eq(g1->gid[i], g2->gid[i])) return false; return true; } /* * RFC 3530 language requires clid_inuse be returned when the * "principal" associated with a requests differs from that previously * used. We use uid, gid's, and gss principal string as our best * approximation. We also don't want to allow non-gss use of a client * established using gss: in theory cr_principal should catch that * change, but in practice cr_principal can be null even in the gss case * since gssd doesn't always pass down a principal string. */ static bool is_gss_cred(struct svc_cred *cr) { /* Is cr_flavor one of the gss "pseudoflavors"?: */ return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR); } static bool same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { if ((is_gss_cred(cr1) != is_gss_cred(cr2)) || (!uid_eq(cr1->cr_uid, cr2->cr_uid)) || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) return false; /* XXX: check that cr_targ_princ fields match ? */ if (cr1->cr_principal == cr2->cr_principal) return true; if (!cr1->cr_principal || !cr2->cr_principal) return false; return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) { struct svc_cred *cr = &rqstp->rq_cred; u32 service; if (!cr->cr_gss_mech) return false; service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); return service == RPC_GSS_SVC_INTEGRITY || service == RPC_GSS_SVC_PRIVACY; } bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) { struct svc_cred *cr = &rqstp->rq_cred; if (!cl->cl_mach_cred) return true; if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) return false; if (!svc_rqst_integrity_protected(rqstp)) return false; if (cl->cl_cred.cr_raw_principal) return 0 == strcmp(cl->cl_cred.cr_raw_principal, cr->cr_raw_principal); if (!cr->cr_principal) return false; return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); } static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) { __be32 verf[2]; /* * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy */ verf[0] = (__force __be32)(u32)ktime_get_real_seconds(); verf[1] = (__force __be32)nn->clverifier_counter++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { clp->cl_clientid.cl_boot = (u32)nn->boot_time; clp->cl_clientid.cl_id = nn->clientid_counter++; gen_confirm(clp, nn); } static struct nfs4_stid * find_stateid_locked(struct nfs4_client *cl, stateid_t *t) { struct nfs4_stid *ret; ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); if (!ret || !ret->sc_type) return NULL; return ret; } static struct nfs4_stid * find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, unsigned short typemask, unsigned short ok_states) { struct nfs4_stid *s; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, t); if (s != NULL) { if ((s->sc_status & ~ok_states) == 0 && (typemask & s->sc_type)) refcount_inc(&s->sc_count); else s = NULL; } spin_unlock(&cl->cl_lock); return s; } static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) { struct nfsdfs_client *nc; nc = get_nfsdfs_client(inode); if (!nc) return NULL; return container_of(nc, struct nfs4_client, cl_nfsdfs); } static void seq_quote_mem(struct seq_file *m, char *data, int len) { seq_puts(m, "\""); seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\"); seq_puts(m, "\""); } static const char *cb_state2str(int state) { switch (state) { case NFSD4_CB_UP: return "UP"; case NFSD4_CB_UNKNOWN: return "UNKNOWN"; case NFSD4_CB_DOWN: return "DOWN"; case NFSD4_CB_FAULT: return "FAULT"; } return "UNDEFINED"; } static int client_info_show(struct seq_file *m, void *v) { struct inode *inode = file_inode(m->file); struct nfsd4_session *ses; struct nfs4_client *clp; u64 clid; clp = get_nfsdfs_clp(inode); if (!clp) return -ENXIO; memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); if (clp->cl_state == NFSD4_COURTESY) seq_puts(m, "status: courtesy\n"); else if (clp->cl_state == NFSD4_EXPIRABLE) seq_puts(m, "status: expirable\n"); else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) seq_puts(m, "status: confirmed\n"); else seq_puts(m, "status: unconfirmed\n"); seq_printf(m, "seconds from last renew: %lld\n", ktime_get_boottime_seconds() - clp->cl_time); seq_puts(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); if (clp->cl_nii_domain.data) { seq_puts(m, "Implementation domain: "); seq_quote_mem(m, clp->cl_nii_domain.data, clp->cl_nii_domain.len); seq_puts(m, "\nImplementation name: "); seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state)); seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr); seq_printf(m, "admin-revoked states: %d\n", atomic_read(&clp->cl_admin_revoked)); spin_lock(&clp->cl_lock); seq_printf(m, "session slots:"); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) seq_printf(m, " %u", ses->se_fchannel.maxreqs); seq_printf(m, "\nsession target slots:"); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) seq_printf(m, " %u", ses->se_target_maxslots); spin_unlock(&clp->cl_lock); seq_puts(m, "\n"); drop_client(clp); return 0; } DEFINE_SHOW_ATTRIBUTE(client_info); static void *states_start(struct seq_file *s, loff_t *pos) __acquires(&clp->cl_lock) { struct nfs4_client *clp = s->private; unsigned long id = *pos; void *ret; spin_lock(&clp->cl_lock); ret = idr_get_next_ul(&clp->cl_stateids, &id); *pos = id; return ret; } static void *states_next(struct seq_file *s, void *v, loff_t *pos) { struct nfs4_client *clp = s->private; unsigned long id = *pos; void *ret; id = *pos; id++; ret = idr_get_next_ul(&clp->cl_stateids, &id); *pos = id; return ret; } static void states_stop(struct seq_file *s, void *v) __releases(&clp->cl_lock) { struct nfs4_client *clp = s->private; spin_unlock(&clp->cl_lock); } static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) { seq_printf(s, "filename: \"%pD2\"", f->nf_file); } static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { struct inode *inode = file_inode(f->nf_file); seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) { seq_puts(s, "owner: "); seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); } static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid) { seq_printf(s, "0x%.8x", stid->si_generation); seq_printf(s, "%12phN", &stid->si_opaque); } static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; struct nfs4_file *nf; struct nfsd_file *file; struct nfs4_stateowner *oo; unsigned int access, deny; ols = openlockstateid(st); oo = ols->st_stateowner; nf = st->sc_file; seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: open, "); access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); seq_printf(s, "access: %s%s, ", access & NFS4_SHARE_ACCESS_READ ? "r" : "-", access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); seq_printf(s, "deny: %s%s, ", deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); if (nf) { spin_lock(&nf->fi_lock); file = find_any_file_locked(nf); if (file) { nfs4_show_superblock(s, file); seq_puts(s, ", "); nfs4_show_fname(s, file); seq_puts(s, ", "); } spin_unlock(&nf->fi_lock); } else seq_puts(s, "closed, "); nfs4_show_owner(s, oo); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); return 0; } static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; struct nfs4_file *nf; struct nfsd_file *file; struct nfs4_stateowner *oo; ols = openlockstateid(st); oo = ols->st_stateowner; nf = st->sc_file; seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: lock, "); spin_lock(&nf->fi_lock); file = find_any_file_locked(nf); if (file) { /* * Note: a lock stateid isn't really the same thing as a lock, * it's the locking state held by one owner on a file, and there * may be multiple (or no) lock ranges associated with it. * (Same for the matter is true of open stateids.) */ nfs4_show_superblock(s, file); /* XXX: open stateid? */ seq_puts(s, ", "); nfs4_show_fname(s, file); seq_puts(s, ", "); } nfs4_show_owner(s, oo); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); spin_unlock(&nf->fi_lock); return 0; } static char *nfs4_show_deleg_type(u32 dl_type) { switch (dl_type) { case OPEN_DELEGATE_READ: return "r"; case OPEN_DELEGATE_WRITE: return "w"; case OPEN_DELEGATE_READ_ATTRS_DELEG: return "ra"; case OPEN_DELEGATE_WRITE_ATTRS_DELEG: return "wa"; } return "?"; } static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_delegation *ds; struct nfs4_file *nf; struct nfsd_file *file; ds = delegstateid(st); nf = st->sc_file; seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: deleg, "); seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type)); /* XXX: lease time, whether it's being recalled. */ spin_lock(&nf->fi_lock); file = nf->fi_deleg_file; if (file) { seq_puts(s, ", "); nfs4_show_superblock(s, file); seq_puts(s, ", "); nfs4_show_fname(s, file); } spin_unlock(&nf->fi_lock); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); return 0; } static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_layout_stateid *ls; struct nfsd_file *file; ls = container_of(st, struct nfs4_layout_stateid, ls_stid); seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: layout"); /* XXX: What else would be useful? */ spin_lock(&ls->ls_stid.sc_file->fi_lock); file = ls->ls_file; if (file) { seq_puts(s, ", "); nfs4_show_superblock(s, file); seq_puts(s, ", "); nfs4_show_fname(s, file); } spin_unlock(&ls->ls_stid.sc_file->fi_lock); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); return 0; } static int states_show(struct seq_file *s, void *v) { struct nfs4_stid *st = v; switch (st->sc_type) { case SC_TYPE_OPEN: return nfs4_show_open(s, st); case SC_TYPE_LOCK: return nfs4_show_lock(s, st); case SC_TYPE_DELEG: return nfs4_show_deleg(s, st); case SC_TYPE_LAYOUT: return nfs4_show_layout(s, st); default: return 0; /* XXX: or SEQ_SKIP? */ } /* XXX: copy stateids? */ } static struct seq_operations states_seq_ops = { .start = states_start, .next = states_next, .stop = states_stop, .show = states_show }; static int client_states_open(struct inode *inode, struct file *file) { struct seq_file *s; struct nfs4_client *clp; int ret; clp = get_nfsdfs_clp(inode); if (!clp) return -ENXIO; ret = seq_open(file, &states_seq_ops); if (ret) return ret; s = file->private_data; s->private = clp; return 0; } static int client_opens_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; struct nfs4_client *clp = m->private; /* XXX: alternatively, we could get/drop in seq start/stop */ drop_client(clp); return seq_release(inode, file); } static const struct file_operations client_states_fops = { .open = client_states_open, .read = seq_read, .llseek = seq_lseek, .release = client_opens_release, }; /* * Normally we refuse to destroy clients that are in use, but here the * administrator is telling us to just do it. We also want to wait * so the caller has a guarantee that the client's locks are gone by * the time the write returns: */ static void force_expire_client(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired; trace_nfsd_clid_admin_expired(&clp->cl_clientid); spin_lock(&nn->client_lock); clp->cl_time = 0; spin_unlock(&nn->client_lock); wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); spin_lock(&nn->client_lock); already_expired = list_empty(&clp->cl_lru); if (!already_expired) unhash_client_locked(clp); spin_unlock(&nn->client_lock); if (!already_expired) expire_client(clp); else wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL); } static ssize_t client_ctl_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { char *data; struct nfs4_client *clp; data = simple_transaction_get(file, buf, size); if (IS_ERR(data)) return PTR_ERR(data); if (size != 7 || 0 != memcmp(data, "expire\n", 7)) return -EINVAL; clp = get_nfsdfs_clp(file_inode(file)); if (!clp) return -ENXIO; force_expire_client(clp); drop_client(clp); return 7; } static const struct file_operations client_ctl_fops = { .write = client_ctl_write, .release = simple_transaction_release, }; static const struct tree_descr client_files[] = { [0] = {"info", &client_info_fops, S_IRUSR}, [1] = {"states", &client_states_fops, S_IRUSR}, [2] = {"ctl", &client_ctl_fops, S_IWUSR}, [3] = {""}, }; static int nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, struct rpc_task *task) { trace_nfsd_cb_recall_any_done(cb, task); switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); return 0; default: return 1; } } static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; drop_client(clp); } static int nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task); ncf->ncf_cb_status = task->tk_status; switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); return 0; default: return 1; } } static void nfsd4_cb_getattr_release(struct nfsd4_callback *cb) { struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); nfs4_put_stid(&dp->dl_stid); } static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { .done = nfsd4_cb_recall_any_done, .release = nfsd4_cb_recall_any_release, .opcode = OP_CB_RECALL_ANY, }; static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = { .done = nfsd4_cb_getattr_done, .release = nfsd4_cb_getattr_release, .opcode = OP_CB_GETATTR, }; static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) { struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) return; /* set to proper status when nfsd4_cb_getattr_done runs */ ncf->ncf_cb_status = NFS4ERR_IO; /* ensure that wake_bit is done when RUNNING is cleared */ set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags); refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&ncf->ncf_getattr); } static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { struct nfs4_client *clp; struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct dentry *dentries[ARRAY_SIZE(client_files)]; clp = alloc_client(name, nn); if (clp == NULL) return NULL; ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { free_client(clp); return NULL; } gen_clid(clp, nn); kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = ktime_get_boottime_seconds(); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; clp->cl_nfsd_dentry = nfsd_client_mkdir( nn, &clp->cl_nfsdfs, clp->cl_clientid.cl_id - nn->clientid_base, client_files, dentries); clp->cl_nfsd_info_dentry = dentries[0]; if (!clp->cl_nfsd_dentry) { free_client(clp); return NULL; } clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); if (!clp->cl_ra) { free_client(clp); return NULL; } clp->cl_ra_time = 0; nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } static void add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) { struct rb_node **new = &(root->rb_node), *parent = NULL; struct nfs4_client *clp; while (*new) { clp = rb_entry(*new, struct nfs4_client, cl_namenode); parent = *new; if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0) new = &((*new)->rb_left); else new = &((*new)->rb_right); } rb_link_node(&new_clp->cl_namenode, parent, new); rb_insert_color(&new_clp->cl_namenode, root); } static struct nfs4_client * find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) { int cmp; struct rb_node *node = root->rb_node; struct nfs4_client *clp; while (node) { clp = rb_entry(node, struct nfs4_client, cl_namenode); cmp = compare_blob(&clp->cl_name, name); if (cmp > 0) node = node->rb_left; else if (cmp < 0) node = node->rb_right; else return clp; } return NULL; } static void add_to_unconfirmed(struct nfs4_client *clp) { unsigned int idhashval; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); renew_client_locked(clp); } static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); trace_nfsd_clid_confirmed(&clp->cl_clientid); renew_client_locked(clp); } static struct nfs4_client * find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); list_for_each_entry(clp, &tbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; renew_client_locked(clp); return clp; } } return NULL; } static struct nfs4_client * find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->conf_id_hashtbl; lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } static struct nfs4_client * find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->unconf_id_hashtbl; lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } static bool clp_used_exchangeid(struct nfs4_client *clp) { return clp->cl_exchange_flags != 0; } static struct nfs4_client * find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->unconf_name_tree); } static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) { struct nfs4_cb_conn *conn = &clp->cl_cb_conn; struct sockaddr *sa = svc_addr(rqstp); u32 scopeid = rpc_get_scope_id(sa); unsigned short expected_family; /* Currently, we only support tcp and tcp6 for the callback channel */ if (se->se_callback_netid_len == 3 && !memcmp(se->se_callback_netid_val, "tcp", 3)) expected_family = AF_INET; else if (se->se_callback_netid_len == 4 && !memcmp(se->se_callback_netid_val, "tcp6", 4)) expected_family = AF_INET6; else goto out_err; conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) goto out_err; if (conn->cb_addr.ss_family == AF_INET6) ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); trace_nfsd_cb_args(clp, conn); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addrlen = 0; trace_nfsd_cb_nodelegs(clp); return; } /* * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct xdr_buf *buf = resp->xdr->buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; dprintk("--> %s slot %p\n", __func__, slot); slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; free_svc_cred(&slot->sl_cred); copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); if (!nfsd4_cache_this(resp)) { slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } slot->sl_flags |= NFSD4_SLOT_CACHED; base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) WARN(1, "%s: sessions DRC could not cache compound\n", __func__); return; } /* * Encode the replay sequence operation from the slot values. * If cachethis is FALSE encode the uncached rep error on the next * operation which sets resp->p and increments resp->opcnt for * nfs4svc_encode_compoundres. * */ static __be32 nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, struct nfsd4_compoundres *resp) { struct nfsd4_op *op; struct nfsd4_slot *slot = resp->cstate.slot; /* Encode the replayed sequence operation */ op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); if (slot->sl_flags & NFSD4_SLOT_CACHED) return op->status; if (args->opcnt == 1) { /* * The original operation wasn't a solo sequence--we * always cache those--so this retry must not match the * original: */ op->status = nfserr_seq_false_retry; } else { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); } return op->status; } /* * The sequence operation is not cached because we can use the slot and * session values. */ static __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; struct xdr_stream *xdr = resp->xdr; __be32 *p; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); if (status) return status; p = xdr_reserve_space(xdr, slot->sl_datalen); if (!p) { WARN_ON_ONCE(1); return nfserr_serverfault; } xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); xdr_commit_encode(xdr); resp->opcnt = slot->sl_opcnt; return slot->sl_status; } /* * Set the exchange_id flags returned by the server. */ static void nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) { #ifdef CONFIG_NFSD_PNFS new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS; #else new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; #endif /* Referrals are supported, Migration is not. */ new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; /* set the wire flags to return to client. */ clid->flags = new->cl_exchange_flags; } static bool client_has_openowners(struct nfs4_client *clp) { struct nfs4_openowner *oo; list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) { if (!list_empty(&oo->oo_owner.so_stateids)) return true; } return false; } static bool client_has_state(struct nfs4_client *clp) { return client_has_openowners(clp) #ifdef CONFIG_NFSD_PNFS || !list_empty(&clp->cl_lo_states) #endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions) || nfsd4_has_active_async_copies(clp); } static __be32 copy_impl_id(struct nfs4_client *clp, struct nfsd4_exchange_id *exid) { if (!exid->nii_domain.data) return 0; xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL); if (!clp->cl_nii_domain.data) return nfserr_jukebox; xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); if (!clp->cl_nii_name.data) return nfserr_jukebox; clp->cl_nii_time = exid->nii_time; return 0; } __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_exchange_id *exid = &u->exchange_id; struct nfs4_client *conf, *new; struct nfs4_client *unconf = NULL; __be32 status; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; struct sockaddr *sa = svc_addr(rqstp); bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " "ip_addr=%s flags %x, spa_how %u\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s", utsname()->sysname, utsname()->release, utsname()->version, utsname()->machine); if (!exid->server_impl_name) return nfserr_jukebox; if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; new = create_client(exid->clname, rqstp, &verf); if (new == NULL) return nfserr_jukebox; status = copy_impl_id(new, exid); if (status) goto out_nolock; switch (exid->spa_how) { case SP4_MACH_CRED: exid->spo_must_enforce[0] = 0; exid->spo_must_enforce[1] = ( 1 << (OP_BIND_CONN_TO_SESSION - 32) | 1 << (OP_EXCHANGE_ID - 32) | 1 << (OP_CREATE_SESSION - 32) | 1 << (OP_DESTROY_SESSION - 32) | 1 << (OP_DESTROY_CLIENTID - 32)); exid->spo_must_allow[0] &= (1 << (OP_CLOSE) | 1 << (OP_OPEN_DOWNGRADE) | 1 << (OP_LOCKU) | 1 << (OP_DELEGRETURN)); exid->spo_must_allow[1] &= ( 1 << (OP_TEST_STATEID - 32) | 1 << (OP_FREE_STATEID - 32)); if (!svc_rqst_integrity_protected(rqstp)) { status = nfserr_inval; goto out_nolock; } /* * Sometimes userspace doesn't give us a principal. * Which is a bug, really. Anyway, we can't enforce * MACH_CRED in that case, better to give up now: */ if (!new->cl_cred.cr_principal && !new->cl_cred.cr_raw_principal) { status = nfserr_serverfault; goto out_nolock; } new->cl_mach_cred = true; break; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); fallthrough; case SP4_SSV: status = nfserr_encr_alg_unsupp; goto out_nolock; } /* Cases below refer to rfc 5661 section 18.35.4: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); bool verfs_match = same_verf(&verf, &conf->cl_verifier); if (update) { if (!clp_used_exchangeid(conf)) { /* buggy client */ status = nfserr_inval; goto out; } if (!nfsd4_mach_creds_match(conf, rqstp)) { status = nfserr_wrong_cred; goto out; } if (!creds_match) { /* case 9 */ status = nfserr_perm; goto out; } if (!verfs_match) { /* case 8 */ status = nfserr_not_same; goto out; } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; trace_nfsd_clid_confirmed_r(conf); goto out_copy; } if (!creds_match) { /* case 3 */ if (client_has_state(conf)) { status = nfserr_clid_inuse; trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } goto out_new; } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; trace_nfsd_clid_confirmed_r(conf); goto out_copy; } /* case 5, client reboot */ trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf); conf = NULL; goto out_new; } if (update) { /* case 7 */ status = nfserr_noent; goto out; } unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ unhash_client_locked(unconf); /* case 1, new owner ID */ trace_nfsd_clid_fresh(new); out_new: if (conf) { status = mark_client_expired_locked(conf); if (status) goto out; trace_nfsd_clid_replaced(&conf->cl_clientid); } new->cl_minorversion = cstate->minorversion; new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1]; /* Contrived initial CREATE_SESSION response */ new->cl_cs_slot.sl_status = nfserr_seq_misordered; add_to_unconfirmed(new); swap(new, conf); out_copy: exid->clientid.cl_boot = conf->cl_clientid.cl_boot; exid->clientid.cl_id = conf->cl_clientid.cl_id; exid->seqid = conf->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(conf, exid); exid->nii_domain.len = sizeof("kernel.org") - 1; exid->nii_domain.data = "kernel.org"; /* * Note that RFC 8881 places no length limit on * nii_name, but this implementation permits no * more than NFS4_OPAQUE_LIMIT bytes. */ exid->nii_name.len = strlen(exid->server_impl_name); if (exid->nii_name.len > NFS4_OPAQUE_LIMIT) exid->nii_name.len = NFS4_OPAQUE_LIMIT; exid->nii_name.data = exid->server_impl_name; /* just send zeros - the date is in nii_name */ exid->nii_time.tv_sec = 0; exid->nii_time.tv_nsec = 0; dprintk("nfsd4_exchange_id seqid %d flags %x\n", conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; out: spin_unlock(&nn->client_lock); out_nolock: if (new) expire_client(new); if (unconf) { trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); expire_client(unconf); } return status; } void nfsd4_exchange_id_release(union nfsd4_op_u *u) { struct nfsd4_exchange_id *exid = &u->exchange_id; kfree(exid->server_impl_name); } static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags) { /* The slot is in use, and no response has been sent. */ if (flags & NFSD4_SLOT_INUSE) { if (seqid == slot_seqid) return nfserr_jukebox; else return nfserr_seq_misordered; } /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; if ((flags & NFSD4_SLOT_REUSED) && seqid == 1) return nfs_ok; if (seqid == slot_seqid) return nfserr_replay_cache; return nfserr_seq_misordered; } /* * Cache the create session result into the create session single DRC * slot cache by saving the xdr structure. sl_seqid has been set. * Do this for solo or embedded create session operations. */ static void nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, struct nfsd4_clid_slot *slot, __be32 nfserr) { slot->sl_status = nfserr; memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); } static __be32 nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, struct nfsd4_clid_slot *slot) { memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); return slot->sl_status; } #define NFSD_MIN_REQ_HDR_SEQ_SZ ((\ 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \ 1 + /* MIN tag is length with zero, only length */ \ 3 + /* version, opcount, opcode */ \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ /* seqid, slotID, slotID, cache */ \ 4 ) * sizeof(__be32)) #define NFSD_MIN_RESP_HDR_SEQ_SZ ((\ 2 + /* verifier: AUTH_NULL, length 0 */\ 1 + /* status */ \ 1 + /* MIN tag is length with zero, only length */ \ 3 + /* opcount, opcode, opstatus*/ \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) { u32 maxrpc = nn->nfsd_serv->sv_max_mesg; if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) return nfserr_toosmall; ca->headerpadsz = 0; ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); return nfs_ok; } /* * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now. * These are based on similar macros in linux/sunrpc/msg_prot.h . */ #define RPC_MAX_HEADER_WITH_AUTH_SYS \ (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK)) #define RPC_MAX_REPHEADER_WITH_AUTH_SYS \ (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK)) #define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32)) #define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \ sizeof(__be32)) static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) { ca->headerpadsz = 0; if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) return nfserr_toosmall; ca->maxresp_cached = 0; if (ca->maxops < 2) return nfserr_toosmall; return nfs_ok; } static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) { switch (cbs->flavor) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: return nfs_ok; default: /* * GSS case: the spec doesn't allow us to return this * error. But it also doesn't allow us not to support * GSS. * I'd rather this fail hard than return some error the * client might think it can already handle: */ return nfserr_encr_alg_unsupp; } } __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create_session *cr_ses = &u->create_session; struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; struct nfsd4_clid_slot *cs_slot; struct nfs4_client *old = NULL; struct nfsd4_session *new; struct nfsd4_conn *conn; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; status = nfsd4_check_cb_sec(&cr_ses->cb_sec); if (status) return status; status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; status = check_backchannel_attrs(&cr_ses->back_channel); if (status) goto out_err; status = nfserr_jukebox; new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); if (!new) goto out_err; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; spin_lock(&nn->client_lock); /* RFC 8881 Section 18.36.4 Phase 1: Client record look-up. */ unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); if (!conf && !unconf) { status = nfserr_stale_clientid; goto out_free_conn; } /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */ if (conf) { cs_slot = &conf->cl_cs_slot; trace_nfsd_slot_seqid_conf(conf, cr_ses); } else { cs_slot = &unconf->cl_cs_slot; trace_nfsd_slot_seqid_unconf(unconf, cr_ses); } status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); switch (status) { case nfs_ok: cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; break; case nfserr_replay_cache: status = nfsd4_replay_create_session(cr_ses, cs_slot); fallthrough; case nfserr_jukebox: /* The server MUST NOT cache NFS4ERR_DELAY */ goto out_free_conn; default: goto out_cache_error; } /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) { status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(conf, rqstp)) goto out_cache_error; } else { status = nfserr_clid_inuse; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out_cache_error; } status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(unconf, rqstp)) goto out_cache_error; old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); if (status) goto out_expired_error; trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; } /* RFC 8881 Section 18.36.4 Phase 4: Session creation. */ status = nfs_ok; /* Persistent sessions are not supported */ cr_ses->flags &= ~SESSION4_PERSIST; /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; /* Report the correct number of backchannel slots */ cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1; init_session(rqstp, new, conf, cr_ses); nfsd4_get_session_locked(new); memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); spin_unlock(&nn->client_lock); if (conf == unconf) fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); /* init connection and backchannel */ nfsd4_init_conn(rqstp, conn, new); nfsd4_put_session(new); if (old) expire_client(old); return status; out_expired_error: /* * Revert the slot seq_nr change so the server will process * the client's resend instead of returning a cached response. */ if (status == nfserr_jukebox) { cs_slot->sl_seqid--; cr_ses->seqid = cs_slot->sl_seqid; goto out_free_conn; } out_cache_error: nfsd4_cache_create_session(cr_ses, cs_slot, status); out_free_conn: spin_unlock(&nn->client_lock); free_conn(conn); out_free_session: __free_session(new); out_err: return status; } static __be32 nfsd4_map_bcts_dir(u32 *dir) { switch (*dir) { case NFS4_CDFC4_FORE: case NFS4_CDFC4_BACK: return nfs_ok; case NFS4_CDFC4_FORE_OR_BOTH: case NFS4_CDFC4_BACK_OR_BOTH: *dir = NFS4_CDFC4_BOTH; return nfs_ok; } return nfserr_inval; } __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; struct nfsd4_session *session = cstate->session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; status = nfsd4_check_cb_sec(&bc->bc_cb_sec); if (status) return status; spin_lock(&nn->client_lock); session->se_cb_prog = bc->bc_cb_program; session->se_cb_sec = bc->bc_cb_sec; spin_unlock(&nn->client_lock); nfsd4_probe_callback(session->se_client); return nfs_ok; } static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) { struct nfsd4_conn *c; list_for_each_entry(c, &s->se_conns, cn_persession) { if (c->cn_xprt == xpt) { return c; } } return NULL; } static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn) { struct nfs4_client *clp = session->se_client; struct svc_xprt *xpt = rqst->rq_xprt; struct nfsd4_conn *c; __be32 status; /* Following the last paragraph of RFC 5661 Section 18.34.3: */ spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(xpt, session); if (!c) status = nfserr_noent; else if (req == c->cn_flags) status = nfs_ok; else if (req == NFS4_CDFC4_FORE_OR_BOTH && c->cn_flags != NFS4_CDFC4_BACK) status = nfs_ok; else if (req == NFS4_CDFC4_BACK_OR_BOTH && c->cn_flags != NFS4_CDFC4_FORE) status = nfs_ok; else status = nfserr_inval; spin_unlock(&clp->cl_lock); if (status == nfs_ok && conn) *conn = c; return status; } __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; __be32 status; struct nfsd4_conn *conn; struct nfsd4_session *session; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; spin_lock(&nn->client_lock); session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status); spin_unlock(&nn->client_lock); if (!session) goto out_no_session; status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; status = nfsd4_match_existing_connection(rqstp, session, bcts->dir, &conn); if (status == nfs_ok) { if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH || bcts->dir == NFS4_CDFC4_BACK) conn->cn_flags |= NFS4_CDFC4_BACK; nfsd4_probe_callback(session->se_client); goto out; } if (status == nfserr_inval) goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; conn = alloc_conn(rqstp, bcts->dir); status = nfserr_jukebox; if (!conn) goto out; nfsd4_init_conn(rqstp, conn, session); status = nfs_ok; out: nfsd4_put_session(session); out_no_session: return status; } static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid) { if (!cstate->session) return false; return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid)); } __be32 nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid; struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; struct net *net = SVC_NET(r); struct nfsd_net *nn = net_generic(net, nfsd_net_id); status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate, sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; ref_held_by_me++; } dump_sessionid(__func__, sessionid); spin_lock(&nn->client_lock); ses = find_in_sessionid_hashtbl(sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(ses->se_client, r)) goto out_put_session; status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) goto out_put_session; unhash_session(ses); spin_unlock(&nn->client_lock); nfsd4_probe_callback_sync(ses->se_client); spin_lock(&nn->client_lock); status = nfs_ok; out_put_session: nfsd4_put_session_locked(ses); out_client_lock: spin_unlock(&nn->client_lock); out: return status; } static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); if (c) goto out_free; status = nfserr_conn_not_bound_to_session; if (clp->cl_mach_cred) goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); return nfs_ok; out_free: spin_unlock(&clp->cl_lock); free_conn(new); return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) { struct nfsd4_compoundargs *args = rqstp->rq_argp; return args->opcnt > session->se_fchannel.maxops; } static bool nfsd4_request_too_big(struct svc_rqst *rqstp, struct nfsd4_session *session) { struct xdr_buf *xb = &rqstp->rq_arg; return xb->len > session->se_fchannel.maxreq_sz; } static bool replay_matches_cache(struct svc_rqst *rqstp, struct nfsd4_sequence *seq, struct nfsd4_slot *slot) { struct nfsd4_compoundargs *argp = rqstp->rq_argp; if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != (bool)seq->cachethis) return false; /* * If there's an error then the reply can have fewer ops than * the call. */ if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) return false; /* * But if we cached a reply with *more* ops than the call you're * sending us now, then this new call is clearly not really a * replay of the old one: */ if (slot->sl_opcnt > argp->opcnt) return false; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) return false; /* * There may be more comparisons we could actually do, but the * spec doesn't require us to catch every case where the calls * don't match (that would require caching the call as well as * the reply), so we don't bother. */ return true; } __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_sequence *seq = &u->sequence; struct nfsd4_compoundres *resp = rqstp->rq_resp; struct xdr_stream *xdr = resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; int buflen; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; /* * Will be either used or freed by nfsd4_sequence_check_conn * below. */ conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); if (!conn) return nfserr_jukebox; spin_lock(&nn->client_lock); session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status); if (!session) goto out_no_session; clp = session->se_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) goto out_put_session; status = nfserr_req_too_big; if (nfsd4_request_too_big(rqstp, session)) goto out_put_session; status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out_put_session; slot = xa_load(&session->se_slots, seq->slotid); dprintk("%s: slotid %d\n", __func__, seq->slotid); trace_nfsd_slot_seqid_sequence(clp, seq, slot); status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) goto out_put_session; status = nfserr_seq_false_retry; if (!replay_matches_cache(rqstp, seq, slot)) goto out_put_session; cstate->slot = slot; cstate->session = session; cstate->clp = clp; /* Return the cached reply status and set cstate->status * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); cstate->status = nfserr_replay_cache; goto out; } if (status) goto out_put_session; status = nfsd4_sequence_check_conn(conn, session); conn = NULL; if (status) goto out_put_session; if (session->se_target_maxslots < session->se_fchannel.maxreqs && slot->sl_generation == session->se_slot_gen && seq->maxslots <= session->se_target_maxslots) /* Client acknowledged our reduce maxreqs */ free_session_slots(session, session->se_target_maxslots); buflen = (seq->cachethis) ? session->se_fchannel.maxresp_cached : session->se_fchannel.maxresp_sz; status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; svc_reserve_auth(rqstp, buflen); status = nfs_ok; /* Success! accept new slot seqid */ slot->sl_seqid = seq->seqid; slot->sl_flags &= ~NFSD4_SLOT_REUSED; slot->sl_flags |= NFSD4_SLOT_INUSE; slot->sl_generation = session->se_slot_gen; if (seq->cachethis) slot->sl_flags |= NFSD4_SLOT_CACHETHIS; else slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS; cstate->slot = slot; cstate->session = session; cstate->clp = clp; /* * If the client ever uses the highest available slot, * gently try to allocate another 20%. This allows * fairly quick growth without grossly over-shooting what * the client might use. */ if (seq->slotid == session->se_fchannel.maxreqs - 1 && session->se_target_maxslots >= session->se_fchannel.maxreqs && session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) { int s = session->se_fchannel.maxreqs; int cnt = DIV_ROUND_UP(s, 5); void *prev_slot; do { /* * GFP_NOWAIT both allows allocation under a * spinlock, and only succeeds if there is * plenty of memory. */ slot = nfsd4_alloc_slot(&session->se_fchannel, s, GFP_NOWAIT); prev_slot = xa_load(&session->se_slots, s); if (xa_is_value(prev_slot) && slot) { slot->sl_seqid = xa_to_value(prev_slot); slot->sl_flags |= NFSD4_SLOT_REUSED; } if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot, GFP_NOWAIT))) { s += 1; session->se_fchannel.maxreqs = s; atomic_add(s - session->se_target_maxslots, &nfsd_total_target_slots); session->se_target_maxslots = s; } else { kfree(slot); slot = NULL; } } while (slot && --cnt > 0); } out: seq->maxslots = max(session->se_target_maxslots, seq->maxslots); seq->target_maxslots = session->se_target_maxslots; switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; break; case NFSD4_CB_FAULT: seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; break; default: seq->status_flags = 0; } if (!list_empty(&clp->cl_revoked)) seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; if (atomic_read(&clp->cl_admin_revoked)) seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; trace_nfsd_seq4_status(rqstp, seq); out_no_session: if (conn) free_conn(conn); spin_unlock(&nn->client_lock); return status; out_put_session: nfsd4_put_session_locked(session); goto out_no_session; } void nfsd4_sequence_done(struct nfsd4_compoundres *resp) { struct nfsd4_compound_state *cs = &resp->cstate; if (nfsd4_has_session(cs)) { if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Drop session reference that was taken in nfsd4_sequence() */ nfsd4_put_session(cs->session); } else if (cs->clp) put_client_renew(cs->clp); } __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; struct nfs4_client *conf, *unconf; struct nfs4_client *clp = NULL; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); WARN_ON_ONCE(conf && unconf); if (conf) { if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } status = mark_client_expired_locked(conf); if (status) goto out; clp = conf; } else if (unconf) clp = unconf; else { status = nfserr_stale_clientid; goto out; } if (!nfsd4_mach_creds_match(clp, rqstp)) { clp = NULL; status = nfserr_wrong_cred; goto out; } trace_nfsd_clid_destroyed(&clp->cl_clientid); unhash_client_locked(clp); out: spin_unlock(&nn->client_lock); if (clp) expire_client(clp); return status; } __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; struct nfs4_client *clp = cstate->clp; __be32 status = 0; if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) return nfserr_nofilehandle; /* * We don't take advantage of the rca_one_fs case. * That's OK, it's optional, we can safely ignore it. */ return nfs_ok; } status = nfserr_complete_already; if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) goto out; status = nfserr_stale_clientid; if (is_client_expired(clp)) /* * The following error isn't really legal. * But we only get here if the client just explicitly * destroyed the client. Surely it no longer cares what * error it gets back on an operation for the dead * client. */ goto out; status = nfs_ok; trace_nfsd_clid_reclaim_complete(&clp->cl_clientid); nfsd4_client_record_create(clp); inc_reclaim_complete(clp); out: return status; } __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setclientid *setclid = &u->setclientid; struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; struct nfs4_client *conf, *new; struct nfs4_client *unconf = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new = create_client(clname, rqstp, &clverifier); if (new == NULL) return nfserr_jukebox; spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf && client_has_state(conf)) { status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); if (conf) { if (same_verf(&conf->cl_verifier, &clverifier)) { copy_clid(new, conf); gen_confirm(new, nn); } else trace_nfsd_clid_verf_mismatch(conf, rqstp, &clverifier); } else trace_nfsd_clid_fresh(new); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); new = NULL; status = nfs_ok; out: spin_unlock(&nn->client_lock); if (new) free_client(new); if (unconf) { trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); expire_client(unconf); } return status; } __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setclientid_confirm *setclientid_confirm = &u->setclientid_confirm; struct nfs4_client *conf, *unconf; struct nfs4_client *old = NULL; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; spin_lock(&nn->client_lock); conf = find_confirmed_client(clid, false, nn); unconf = find_unconfirmed_client(clid, false, nn); /* * We try hard to give out unique clientid's, so if we get an * attempt to confirm the same clientid with a different cred, * the client may be buggy; this should never happen. * * Nevertheless, RFC 7530 recommends INUSE for this case: */ status = nfserr_clid_inuse; if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out; } if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) { trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { if (conf && same_verf(&confirm, &conf->cl_confirm)) { status = nfs_ok; } else status = nfserr_stale_clientid; goto out; } status = nfs_ok; if (conf) { if (get_client_locked(conf) == nfs_ok) { old = unconf; unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); } else { conf = NULL; } } if (!conf) { old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, &old->cl_cred)) { old = NULL; goto out; } status = mark_client_expired_locked(old); if (status) { old = NULL; goto out; } trace_nfsd_clid_replaced(&old->cl_clientid); } status = get_client_locked(unconf); if (status != nfs_ok) { old = NULL; goto out; } move_to_confirmed(unconf); conf = unconf; } spin_unlock(&nn->client_lock); if (conf == unconf) fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); nfsd4_probe_callback(conf); spin_lock(&nn->client_lock); put_client_renew_locked(conf); out: spin_unlock(&nn->client_lock); if (old) expire_client(old); return status; } static struct nfs4_file *nfsd4_alloc_file(void) { return kmem_cache_alloc(file_slab, GFP_KERNEL); } /* OPEN Share state helper functions */ static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) { refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); INIT_LIST_HEAD(&fp->fi_clnt_odstate); fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle); fp->fi_deleg_file = NULL; fp->fi_rdeleg_file = NULL; fp->fi_had_conflict = false; fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); fp->fi_aliased = false; fp->fi_inode = d_inode(fh->fh_dentry); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif } void nfsd4_free_slabs(void) { kmem_cache_destroy(client_slab); kmem_cache_destroy(openowner_slab); kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(file_slab); kmem_cache_destroy(stateid_slab); kmem_cache_destroy(deleg_slab); kmem_cache_destroy(odstate_slab); } int nfsd4_init_slabs(void) { client_slab = KMEM_CACHE(nfs4_client, 0); if (client_slab == NULL) goto out; openowner_slab = KMEM_CACHE(nfs4_openowner, 0); if (openowner_slab == NULL) goto out_free_client_slab; lockowner_slab = KMEM_CACHE(nfs4_lockowner, 0); if (lockowner_slab == NULL) goto out_free_openowner_slab; file_slab = KMEM_CACHE(nfs4_file, 0); if (file_slab == NULL) goto out_free_lockowner_slab; stateid_slab = KMEM_CACHE(nfs4_ol_stateid, 0); if (stateid_slab == NULL) goto out_free_file_slab; deleg_slab = KMEM_CACHE(nfs4_delegation, 0); if (deleg_slab == NULL) goto out_free_stateid_slab; odstate_slab = KMEM_CACHE(nfs4_clnt_odstate, 0); if (odstate_slab == NULL) goto out_free_deleg_slab; return 0; out_free_deleg_slab: kmem_cache_destroy(deleg_slab); out_free_stateid_slab: kmem_cache_destroy(stateid_slab); out_free_file_slab: kmem_cache_destroy(file_slab); out_free_lockowner_slab: kmem_cache_destroy(lockowner_slab); out_free_openowner_slab: kmem_cache_destroy(openowner_slab); out_free_client_slab: kmem_cache_destroy(client_slab); out: return -ENOMEM; } static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { struct nfsd_net *nn = shrink->private_data; long count; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) count = atomic_long_read(&num_delegations); if (count) queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } static unsigned long nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { return SHRINK_STOP; } void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; u64 max_clients; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; nn->track_reclaim_completes = false; nn->clverifier_counter = get_random_u32(); nn->clientid_base = get_random_u32(); nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->nfs4_client_count, 0); si_meminfo(&si); max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); max_clients *= NFS4_CLIENTS_PER_GB; nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); } enum rp_lock { RP_UNLOCKED, RP_LOCKED, RP_UNHASHED, }; static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; rp->rp_locked = RP_UNLOCKED; } static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so) { if (!nfsd4_has_session(cstate)) { wait_var_event(&so->so_replay.rp_locked, cmpxchg(&so->so_replay.rp_locked, RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); if (so->so_replay.rp_locked == RP_UNHASHED) return -EAGAIN; cstate->replay_owner = nfs4_get_stateowner(so); } return 0; } void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) { struct nfs4_stateowner *so = cstate->replay_owner; if (so != NULL) { cstate->replay_owner = NULL; store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED); nfs4_put_stateowner(so); } } static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) { struct nfs4_stateowner *sop; sop = kmem_cache_alloc(slab, GFP_KERNEL); if (!sop) return NULL; xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL); if (!sop->so_owner.data) { kmem_cache_free(slab, sop); return NULL; } INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; init_nfs4_replay(&sop->so_replay); atomic_set(&sop->so_count, 1); return sop; } static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { lockdep_assert_held(&clp->cl_lock); list_add(&oo->oo_owner.so_strhash, &clp->cl_ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } static void nfs4_unhash_openowner(struct nfs4_stateowner *so) { unhash_openowner_locked(openowner(so)); } static void nfs4_free_openowner(struct nfs4_stateowner *so) { struct nfs4_openowner *oo = openowner(so); kmem_cache_free(openowner_slab, oo); } static const struct nfs4_stateowner_operations openowner_ops = { .so_unhash = nfs4_unhash_openowner, .so_free = nfs4_free_openowner, }; static struct nfs4_ol_stateid * nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_ol_stateid *local, *ret = NULL; struct nfs4_openowner *oo = open->op_openowner; lockdep_assert_held(&fp->fi_lock); list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; if (local->st_stateowner != &oo->oo_owner) continue; if (local->st_stid.sc_type == SC_TYPE_OPEN && !local->st_stid.sc_status) { ret = local; refcount_inc(&ret->st_stid.sc_count); break; } } return ret; } static void nfsd4_drop_revoked_stid(struct nfs4_stid *s) __releases(&s->sc_client->cl_lock) { struct nfs4_client *cl = s->sc_client; LIST_HEAD(reaplist); struct nfs4_ol_stateid *stp; struct nfs4_delegation *dp; bool unhashed; switch (s->sc_type) { case SC_TYPE_OPEN: stp = openlockstateid(s); if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); spin_unlock(&cl->cl_lock); free_ol_stateid_reaplist(&reaplist); break; case SC_TYPE_LOCK: stp = openlockstateid(s); unhashed = unhash_lock_stateid(stp); spin_unlock(&cl->cl_lock); if (unhashed) nfs4_put_stid(s); break; case SC_TYPE_DELEG: dp = delegstateid(s); list_del_init(&dp->dl_recall_lru); spin_unlock(&cl->cl_lock); nfs4_put_stid(s); break; default: spin_unlock(&cl->cl_lock); } } static void nfsd40_drop_revoked_stid(struct nfs4_client *cl, stateid_t *stid) { /* NFSv4.0 has no way for the client to tell the server * that it can forget an admin-revoked stateid. * So we keep it around until the first time that the * client uses it, and drop it the first time * nfserr_admin_revoked is returned. * For v4.1 and later we wait until explicitly told * to free the stateid. */ if (cl->cl_minorversion == 0) { struct nfs4_stid *st; spin_lock(&cl->cl_lock); st = find_stateid_locked(cl, stid); if (st) nfsd4_drop_revoked_stid(st); else spin_unlock(&cl->cl_lock); } } static __be32 nfsd4_verify_open_stid(struct nfs4_stid *s) { __be32 ret = nfs_ok; if (s->sc_status & SC_STATUS_ADMIN_REVOKED) ret = nfserr_admin_revoked; else if (s->sc_status & SC_STATUS_REVOKED) ret = nfserr_deleg_revoked; else if (s->sc_status & SC_STATUS_CLOSED) ret = nfserr_bad_stateid; return ret; } /* Lock the stateid st_mutex, and deal with races with CLOSE */ static __be32 nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) { __be32 ret; mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); ret = nfsd4_verify_open_stid(&stp->st_stid); if (ret == nfserr_admin_revoked) nfsd40_drop_revoked_stid(stp->st_stid.sc_client, &stp->st_stid.sc_stateid); if (ret != nfs_ok) mutex_unlock(&stp->st_mutex); return ret; } static struct nfs4_ol_stateid * nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_ol_stateid *stp; for (;;) { spin_lock(&fp->fi_lock); stp = nfsd4_find_existing_open(fp, open); spin_unlock(&fp->fi_lock); if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) break; nfs4_put_stid(&stp->st_stid); } return stp; } static struct nfs4_openowner * find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; struct nfs4_openowner *oo, *new = NULL; retry: spin_lock(&clp->cl_lock); oo = find_openstateowner_str(strhashval, open, clp); if (!oo && new) { hash_openowner(new, clp, strhashval); spin_unlock(&clp->cl_lock); return new; } spin_unlock(&clp->cl_lock); if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ release_openowner(oo); oo = NULL; } if (oo) { if (new) nfs4_free_stateowner(&new->oo_owner); return oo; } new = alloc_stateowner(openowner_slab, &open->op_owner, clp); if (!new) return NULL; new->oo_owner.so_ops = &openowner_ops; new->oo_owner.so_is_open_owner = 1; new->oo_owner.so_seqid = open->op_seqid; new->oo_flags = 0; if (nfsd4_has_session(cstate)) new->oo_flags |= NFS4_OO_CONFIRMED; new->oo_time = 0; new->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&new->oo_close_lru); goto retry; } static struct nfs4_ol_stateid * init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; struct nfs4_ol_stateid *stp; stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */ mutex_init(&stp->st_mutex); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); if (nfs4_openowner_unhashed(oo)) { mutex_unlock(&stp->st_mutex); stp = NULL; goto out_unlock; } retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; open->op_stp = NULL; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = SC_TYPE_OPEN; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { /* Handle races with CLOSE */ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { nfs4_put_stid(&retstp->st_stid); goto retry; } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); stp = retstp; } return stp; } /* * In the 4.0 case we need to keep the owners around a little while to handle * CLOSE replay. We still do need to release any file access that is held by * them before returning however. */ static void move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) { struct nfs4_ol_stateid *last; struct nfs4_openowner *oo = openowner(s->st_stateowner); struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net, nfsd_net_id); dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); /* * We know that we hold one reference via nfsd4_close, and another * "persistent" reference for the client. If the refcount is higher * than 2, then there are still calls in progress that are using this * stateid. We can't put the sc_file reference until they are finished. * Wait for the refcount to drop to 2. Since it has been unhashed, * there should be no danger of the refcount going back up again at * this point. * Some threads with a reference might be waiting for rp_locked, * so tell them to stop waiting. */ store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); if (s->st_stid.sc_file) { put_nfs4_file(s->st_stid.sc_file); s->st_stid.sc_file = NULL; } spin_lock(&nn->client_lock); last = oo->oo_last_closed_stid; oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = ktime_get_boottime_seconds(); spin_unlock(&nn->client_lock); if (last) nfs4_put_stid(&last->st_stid); } static noinline_for_stack struct nfs4_file * nfsd4_file_hash_lookup(const struct svc_fh *fhp) { struct inode *inode = d_inode(fhp->fh_dentry); struct rhlist_head *tmp, *list; struct nfs4_file *fi; rcu_read_lock(); list = rhltable_lookup(&nfs4_file_rhltable, &inode, nfs4_file_rhash_params); rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) { rcu_read_unlock(); return fi; } } } rcu_read_unlock(); return NULL; } /* * On hash insertion, identify entries with the same inode but * distinct filehandles. They will all be on the list returned * by rhltable_lookup(). * * inode->i_lock prevents racing insertions from adding an entry * for the same inode/fhp pair twice. */ static noinline_for_stack struct nfs4_file * nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { struct inode *inode = d_inode(fhp->fh_dentry); struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; struct nfs4_file *fi; int err; rcu_read_lock(); spin_lock(&inode->i_lock); list = rhltable_lookup(&nfs4_file_rhltable, &inode, nfs4_file_rhash_params); rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) ret = fi; } else fi->fi_aliased = alias_found = true; } if (ret) goto out_unlock; nfsd4_file_init(fhp, new); err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, nfs4_file_rhash_params); if (err) goto out_unlock; new->fi_aliased = alias_found; ret = new; out_unlock: spin_unlock(&inode->i_lock); rcu_read_unlock(); return ret; } static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, nfs4_file_rhash_params); } /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid */ static __be32 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { struct nfs4_file *fp; __be32 ret = nfs_ok; fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) ret = nfserr_locked; spin_unlock(&fp->fi_lock); put_nfs4_file(fp); return ret; } static bool nfsd4_deleg_present(const struct inode *inode) { struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } /** * nfsd_wait_for_delegreturn - wait for delegations to be returned * @rqstp: the RPC transaction being executed * @inode: in-core inode of the file being waited for * * The timeout prevents deadlock if all nfsd threads happen to be * tied up waiting for returning delegations. * * Return values: * %true: delegation was returned * %false: timed out waiting for delegreturn */ bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode) { long __maybe_unused timeo; timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode), NFSD_DELEGRETURN_TIMEOUT); trace_nfsd_delegret_wakeup(rqstp, inode, timeo); return timeo > 0; } static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, nfsd_net_id); block_delegations(&dp->dl_stid.sc_file->fi_fhandle); /* * We can't do this in nfsd_break_deleg_cb because it is * already holding inode->i_lock. * * If the dl_time != 0, then we know that it has already been * queued for a lease break. Don't queue it again. */ spin_lock(&state_lock); if (delegation_hashed(dp) && dp->dl_time == 0) { dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } spin_unlock(&state_lock); } static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_delegation *dp = cb_to_delegation(cb); trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); if (dp->dl_stid.sc_status) /* CLOSED or REVOKED */ return 1; switch (task->tk_status) { case 0: return 1; case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); return 0; case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: /* * Race: client probably got cb_recall before open reply * granting delegation. */ if (dp->dl_retries--) { rpc_delay(task, 2 * HZ); return 0; } fallthrough; default: return 1; } } static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); nfs4_put_stid(&dp->dl_stid); } static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { .prepare = nfsd4_cb_recall_prepare, .done = nfsd4_cb_recall_done, .release = nfsd4_cb_recall_release, .opcode = OP_CB_RECALL, }; static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { bool queued; if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) return; /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the * flc_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); queued = nfsd4_run_cb(&dp->dl_recall); WARN_ON_ONCE(!queued); if (!queued) refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ static bool nfsd_break_deleg_cb(struct file_lease *fl) { struct nfs4_delegation *dp = (struct nfs4_delegation *) fl->c.flc_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn; trace_nfsd_cb_recall(&dp->dl_stid); dp->dl_recalled = true; atomic_inc(&clp->cl_delegs_in_recall); if (try_to_expire_client(clp)) { nn = net_generic(clp->net, nfsd_net_id); mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned * in time: */ fl->fl_break_time = 0; fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); return false; } /** * nfsd_breaker_owns_lease - Check if lease conflict was resolved * @fl: Lock state to check * * Return values: * %true: Lease conflict was resolved * %false: Lease conflict was not resolved. */ static bool nfsd_breaker_owns_lease(struct file_lease *fl) { struct nfs4_delegation *dl = fl->c.flc_owner; struct svc_rqst *rqst; struct nfs4_client *clp; rqst = nfsd_current_rqst(); if (!nfsd_v4client(rqst)) return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; } static int nfsd_change_deleg_cb(struct file_lease *onlist, int arg, struct list_head *dispose) { struct nfs4_delegation *dp = (struct nfs4_delegation *) onlist->c.flc_owner; struct nfs4_client *clp = dp->dl_stid.sc_client; if (arg & F_UNLCK) { if (dp->dl_recalled) atomic_dec(&clp->cl_delegs_in_recall); return lease_modify(onlist, arg, dispose); } else return -EAGAIN; } static const struct lease_manager_operations nfsd_lease_mng_ops = { .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, }; static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) { if (nfsd4_has_session(cstate)) return nfs_ok; if (seqid == so->so_seqid - 1) return nfserr_replay_me; if (seqid == so->so_seqid) return nfs_ok; return nfserr_bad_seqid; } static struct nfs4_client *lookup_clientid(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *found; spin_lock(&nn->client_lock); found = find_confirmed_client(clid, sessions, nn); if (found) atomic_inc(&found->cl_rpc_users); spin_unlock(&nn->client_lock); return found; } static __be32 set_client(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn) { if (cstate->clp) { if (!same_clid(&cstate->clp->cl_clientid, clid)) return nfserr_stale_clientid; return nfs_ok; } if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; /* * We're in the 4.0 case (otherwise the SEQUENCE op would have * set cstate->clp), so session = false: */ cstate->clp = lookup_clientid(clid, false, nn); if (!cstate->clp) return nfserr_expired; return nfs_ok; } __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct nfsd_net *nn) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; unsigned int strhashval; struct nfs4_openowner *oo = NULL; __be32 status; /* * In case we need it later, after we've already created the * file and don't want to risk a further failure: */ open->op_file = nfsd4_alloc_file(); if (open->op_file == NULL) return nfserr_jukebox; status = set_client(clientid, cstate, nn); if (status) return status; clp = cstate->clp; strhashval = ownerstr_hashval(&open->op_owner); retry: oo = find_or_alloc_open_stateowner(strhashval, open, cstate); open->op_openowner = oo; if (!oo) return nfserr_jukebox; if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) { nfs4_put_stateowner(&oo->oo_owner); goto retry; } status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; if (nfsd4_has_session(cstate) && (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) { open->op_odstate = alloc_clnt_odstate(clp); if (!open->op_odstate) return nfserr_jukebox; } return nfs_ok; } static inline __be32 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) { if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type)) return nfserr_openmode; else return nfs_ok; } static int share_access_to_flags(u32 share_access) { return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; } static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) { struct nfs4_stid *ret; ret = find_stateid_by_type(cl, s, SC_TYPE_DELEG, SC_STATUS_REVOKED); if (!ret) return NULL; return delegstateid(ret); } static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) { return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR || open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH; } static __be32 nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; __be32 status = nfserr_bad_stateid; struct nfs4_delegation *deleg; deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; if (deleg->dl_stid.sc_status & SC_STATUS_ADMIN_REVOKED) { nfs4_put_stid(&deleg->dl_stid); status = nfserr_admin_revoked; goto out; } if (deleg->dl_stid.sc_status & SC_STATUS_REVOKED) { nfs4_put_stid(&deleg->dl_stid); nfsd40_drop_revoked_stid(cl, &open->op_delegate_stateid); status = nfserr_deleg_revoked; goto out; } flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(deleg, flags); if (status) { nfs4_put_stid(&deleg->dl_stid); goto out; } *dp = deleg; out: if (!nfsd4_is_deleg_cur(open)) return nfs_ok; if (status) return status; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; return nfs_ok; } static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; if (nfs4_access & NFS4_SHARE_ACCESS_READ) flags |= NFSD_MAY_READ; if (nfs4_access & NFS4_SHARE_ACCESS_WRITE) flags |= NFSD_MAY_WRITE; return flags; } static inline __be32 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, struct nfsd4_open *open) { struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, }; struct nfsd_attrs attrs = { .na_iattr = &iattr, }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; return nfsd_setattr(rqstp, fh, &attrs, NULL); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open, bool new_stp) { struct nfsd_file *nf = NULL; __be32 status; int oflag = nfs4_access_to_omode(open->op_share_access); int access = nfs4_access_to_access(open->op_share_access); unsigned char old_access_bmap, old_deny_bmap; spin_lock(&fp->fi_lock); /* * Are we trying to set a deny mode that would conflict with * current access? */ status = nfs4_file_check_deny(fp, open->op_share_deny); if (status != nfs_ok) { if (status != nfserr_share_denied) { spin_unlock(&fp->fi_lock); goto out; } if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, stp, open->op_share_deny, false)) status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } /* set access to the file */ status = nfs4_file_get_access(fp, open->op_share_access); if (status != nfs_ok) { if (status != nfserr_share_denied) { spin_unlock(&fp->fi_lock); goto out; } if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, stp, open->op_share_access, true)) status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } /* Set access bits in stateid */ old_access_bmap = stp->st_access_bmap; set_access(open->op_share_access, stp); /* Set new deny mask */ old_deny_bmap = stp->st_deny_bmap; set_deny(open->op_share_deny, stp); fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); status = nfsd_file_acquire_opened(rqstp, cur_fh, access, open->op_filp, &nf); if (status != nfs_ok) goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { fp->fi_fds[oflag] = nf; nf = NULL; } } spin_unlock(&fp->fi_lock); if (nf) nfsd_file_put(nf); status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode, access)); if (status) goto out_put_access; status = nfsd4_truncate(rqstp, cur_fh, open); if (status) goto out_put_access; out: return status; out_put_access: stp->st_access_bmap = old_access_bmap; nfs4_file_put_access(fp, open->op_share_access); reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp); goto out; } static __be32 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { __be32 status; unsigned char old_deny_bmap = stp->st_deny_bmap; if (!test_access(open->op_share_access, stp)) return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false); /* test and set deny mode */ spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); switch (status) { case nfs_ok: set_deny(open->op_share_deny, stp); fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); break; case nfserr_share_denied: if (nfs4_resolve_deny_conflicts_locked(fp, false, stp, open->op_share_deny, false)) status = nfserr_jukebox; break; } spin_unlock(&fp->fi_lock); if (status != nfs_ok) return status; status = nfsd4_truncate(rqstp, cur_fh, open); if (status != nfs_ok) reset_union_bmap_deny(old_deny_bmap, stp); return status; } /* Should we give out recallable state?: */ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) { if (clp->cl_cb_state == NFSD4_CB_UP) return true; /* * In the sessions case, since we don't have to establish a * separate connection for callbacks, we assume it's OK * until we hear otherwise: */ return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp) { struct file_lease *fl; fl = locks_alloc_lease(); if (!fl) return NULL; fl->fl_lmops = &nfsd_lease_mng_ops; fl->c.flc_flags = FL_DELEG; fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK; fl->c.flc_owner = (fl_owner_t)dp; fl->c.flc_pid = current->tgid; fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; return fl; } static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, struct nfs4_file *fp) { struct nfs4_ol_stateid *st; struct file *f = fp->fi_deleg_file->nf_file; struct inode *ino = file_inode(f); int writes; writes = atomic_read(&ino->i_writecount); if (!writes) return 0; /* * There could be multiple filehandles (hence multiple * nfs4_files) referencing this file, but that's not too * common; let's just give up in that case rather than * trying to go look up all the clients using that other * nfs4_file as well: */ if (fp->fi_aliased) return -EAGAIN; /* * If there's a close in progress, make sure that we see it * clear any fi_fds[] entries before we see it decrement * i_writecount: */ smp_mb__after_atomic(); if (fp->fi_fds[O_WRONLY]) writes--; if (fp->fi_fds[O_RDWR]) writes--; if (writes > 0) return -EAGAIN; /* There may be non-NFSv4 writers */ /* * It's possible there are non-NFSv4 write opens in progress, * but if they haven't incremented i_writecount yet then they * also haven't called break lease yet; so, they'll break this * lease soon enough. So, all that's left to check for is NFSv4 * opens: */ spin_lock(&fp->fi_lock); list_for_each_entry(st, &fp->fi_stateids, st_perfile) { if (st->st_openstp == NULL /* it's an open */ && access_permit_write(st) && st->st_stid.sc_client != clp) { spin_unlock(&fp->fi_lock); return -EAGAIN; } } spin_unlock(&fp->fi_lock); /* * There's a small chance that we could be racing with another * NFSv4 open. However, any open that hasn't added itself to * the fi_stateids list also hasn't called break_lease yet; so, * they'll break this lease soon enough. */ return 0; } /* * It's possible that between opening the dentry and setting the delegation, * that it has been renamed or unlinked. Redo the lookup to verify that this * hasn't happened. */ static int nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, struct svc_fh *parent) { struct svc_export *exp; struct dentry *child; __be32 err; err = nfsd_lookup_dentry(open->op_rqstp, parent, open->op_fname, open->op_fnamelen, &exp, &child); if (err) return -EAGAIN; exp_put(exp); dput(child); if (child != file_dentry(fp->fi_deleg_file->nf_file)) return -EAGAIN; return 0; } /* * We avoid breaking delegations held by a client due to its own activity, but * clearing setuid/setgid bits on a write is an implicit activity and the client * may not notice and continue using the old mode. Avoid giving out a delegation * on setuid/setgid files when the client is requesting an open for write. */ static int nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) { struct inode *inode = file_inode(nf->nf_file); if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) && (inode->i_mode & (S_ISUID|S_ISGID))) return -EAGAIN; return 0; } #ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) { return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; } #else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */ static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) { return false; } #endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */ static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) { bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf = NULL; struct file_lease *fl; int status = 0; u32 dl_type; /* * The fi_had_conflict and nfs_get_existing_delegation checks * here are just optimizations; we'll need to recheck them at * the end: */ if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); /* * Try for a write delegation first. RFC8881 section 10.4 says: * * "An OPEN_DELEGATE_WRITE delegation allows the client to handle, * on its own, all opens." * * Furthermore, section 9.1.2 says: * * "In the case of READ, the server may perform the corresponding * check on the access mode, or it may choose to allow READ for * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE * implementation may unavoidably do reads (e.g., due to buffer * cache constraints)." * * We choose to offer a write delegation for OPEN with the * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients. */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { nf = find_writeable_file(fp); dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; } /* * If the file is being opened O_RDONLY or we couldn't get a O_RDWR * file for some reason, then try for a read delegation instead. */ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) { nf = find_readable_file(fp); dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; } if (!nf) return ERR_PTR(-EAGAIN); /* * File delegations and associated locks cannot be recovered if the * export is from an NFS proxy server. */ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { nfsd_file_put(nf); return ERR_PTR(-EOPNOTSUPP); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) status = -EAGAIN; else if (nfsd4_verify_setuid_write(open, nf)) status = -EAGAIN; else if (!fp->fi_deleg_file) { fp->fi_deleg_file = nf; /* increment early to prevent fi_deleg_file from being * cleared */ fp->fi_delegees = 1; nf = NULL; } else fp->fi_delegees++; spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); if (nf) nfsd_file_put(nf); if (status) return ERR_PTR(status); status = -ENOMEM; dp = alloc_init_deleg(clp, fp, odstate, dl_type); if (!dp) goto out_delegees; fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_clnt_odstate; status = kernel_setlease(fp->fi_deleg_file->nf_file, fl->c.flc_type, &fl, NULL); if (fl) locks_free_lease(fl); if (status) goto out_clnt_odstate; if (parent) { status = nfsd4_verify_deleg_dentry(open, fp, parent); if (status) goto out_unlock; } status = nfsd4_check_conflicting_opens(clp, fp); if (status) goto out_unlock; /* * Now that the deleg is set, check again to ensure that nothing * raced in and changed the mode while we weren't looking. */ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); if (status) goto out_unlock; status = -EAGAIN; if (fp->fi_had_conflict) goto out_unlock; spin_lock(&state_lock); spin_lock(&clp->cl_lock); spin_lock(&fp->fi_lock); status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); spin_unlock(&state_lock); if (status) goto out_unlock; return dp; out_unlock: kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp); out_clnt_odstate: put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_stid(&dp->dl_stid); out_delegees: put_deleg_file(fp); return ERR_PTR(status); } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; if (status == -EAGAIN) open->op_why_no_deleg = WND4_CONTENTION; else { open->op_why_no_deleg = WND4_RESOURCE; switch (open->op_deleg_want) { case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: break; case OPEN4_SHARE_ACCESS_WANT_CANCEL: open->op_why_no_deleg = WND4_CANCELLED; break; case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: WARN_ON_ONCE(1); } } } static bool nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, struct kstat *stat) { struct nfsd_file *nf = find_writeable_file(dp->dl_stid.sc_file); struct path path; int rc; if (!nf) return false; path.mnt = currentfh->fh_export->ex_path.mnt; path.dentry = file_dentry(nf->nf_file); rc = vfs_getattr(&path, stat, STATX_MODE | STATX_SIZE | STATX_ATIME | STATX_MTIME | STATX_CTIME | STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT); nfsd_file_put(nf); return rc == 0; } /* * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and * struct file to be used for read with delegation stateid. * */ static bool nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open, struct svc_fh *fh, struct nfs4_ol_stateid *stp) { struct nfs4_file *fp; struct nfsd_file *nf = NULL; if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_WRITE) { if (nfsd_file_acquire_opened(rqstp, fh, NFSD_MAY_READ, NULL, &nf)) return (false); fp = stp->st_stid.sc_file; spin_lock(&fp->fi_lock); __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ); fp = stp->st_stid.sc_file; fp->fi_fds[O_RDONLY] = nf; fp->fi_rdeleg_file = nf; spin_unlock(&fp->fi_lock); } return true; } /* * The Linux NFS server does not offer write delegations to NFSv4.0 * clients in order to avoid conflicts between write delegations and * GETATTRs requesting CHANGE or SIZE attributes. * * With NFSv4.1 and later minorversions, the SEQUENCE operation that * begins each COMPOUND contains a client ID. Delegation recall can * be avoided when the server recognizes the client sending a * GETATTR also holds write delegation it conflicts with. * * However, the NFSv4.0 protocol does not enable a server to * determine that a GETATTR originated from the client holding the * conflicting delegation versus coming from some other client. Per * RFC 7530 Section 16.7.5, the server must recall or send a * CB_GETATTR even when the GETATTR originates from the client that * holds the conflicting delegation. * * An NFSv4.0 client can trigger a pathological situation if it * always sends a DELEGRETURN preceded by a conflicting GETATTR in * the same COMPOUND. COMPOUND execution will always stop at the * GETATTR and the DELEGRETURN will never get executed. The server * eventually revokes the delegation, which can result in loss of * open or lock state. */ static void nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh, struct svc_fh *fh) { struct nfs4_openowner *oo = openowner(stp->st_stateowner); bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; struct nfs4_delegation *dp; struct kstat stat; int status = 0; int cb_up; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = false; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = true; break; case NFS4_OPEN_CLAIM_NULL: parent = currentfh; fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's * had the chance to reclaim theirs, *and* until * NLM locks have all been reclaimed: */ if (locks_in_grace(clp->net)) goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE && !clp->cl_minorversion) goto out_no_deleg; break; default: goto out_no_deleg; } dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg; memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { struct file *f = dp->dl_stid.sc_file->fi_deleg_file->nf_file; if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) || !nfs4_delegation_stat(dp, currentfh, &stat)) { nfs4_put_stid(&dp->dl_stid); destroy_delegation(dp); goto out_no_deleg; } open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; dp->dl_cb_fattr.ncf_cur_fsize = stat.size; dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); dp->dl_atime = stat.atime; dp->dl_ctime = stat.ctime; dp->dl_mtime = stat.mtime; spin_lock(&f->f_lock); f->f_mode |= FMODE_NOCMTIME; spin_unlock(&f->f_lock); trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { open->op_delegate_type = deleg_ts && nfs4_delegation_stat(dp, currentfh, &stat) ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; dp->dl_atime = stat.atime; trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); } nfs4_put_stid(&dp->dl_stid); return; out_no_deleg: open->op_delegate_type = OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && open->op_delegate_type != OPEN_DELEGATE_NONE) { dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_recall = true; } /* 4.1 client asking for a delegation? */ if (open->op_deleg_want) nfsd4_open_deleg_none_ext(open, status); return; } static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp) { if (deleg_is_write(dp->dl_type)) { if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; } } /* Otherwise the client must be confused wanting a delegation * it already has, therefore we don't return * OPEN_DELEGATE_NONE_EXT and reason. */ } /* Are we returning only a delegation stateid? */ static bool open_xor_delegation(struct nfsd4_open *open) { if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) return false; /* Did we actually get a delegation? */ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type)) return false; return true; } /** * nfsd4_process_open2 - finish open processing * @rqstp: the RPC transaction being executed * @current_fh: NFSv4 COMPOUND's current filehandle * @open: OPEN arguments * * If successful, (1) truncate the file if open->op_truncate was * set, (2) set open->op_stateid, (3) set open->op_delegation. * * Returns %nfs_ok on success; otherwise an nfs4stat value in * network byte order is returned. */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ fp = nfsd4_file_hash_insert(open->op_file, current_fh); if (unlikely(!fp)) return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; if (dp && nfsd4_is_deleg_cur(open) && (dp->dl_stid.sc_file != fp)) { /* * RFC8881 section 8.2.4 mandates the server to return * NFS4ERR_BAD_STATEID if the selected table entry does * not match the current filehandle. However returning * NFS4ERR_BAD_STATEID in the OPEN can cause the client * to repeatedly retry the operation with the same * stateid, since the stateid itself is valid. To avoid * this situation NFSD returns NFS4ERR_INVAL instead. */ status = nfserr_inval; goto out; } stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; if (nfsd4_is_deleg_cur(open)) goto out; } if (!stp) { stp = init_open_stateid(fp, open); if (!stp) { status = nfserr_jukebox; goto out; } if (!open->op_stp) new_stp = true; } /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. * * stp is already locked. */ if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); goto out; } } else { status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); if (status) { release_open_stateid(stp); mutex_unlock(&stp->st_mutex); goto out; } stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp, open->op_odstate); if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; goto nodeleg; } } /* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ nfs4_open_delegation(rqstp, open, stp, &resp->cstate.current_fh, current_fh); /* * If there is an existing open stateid, it must be updated and * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new * open stateid would have to be created. */ if (new_stp && open_xor_delegation(open)) { memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid)); open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID; release_open_stateid(stp); } nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp && open->op_deleg_want) nfsd4_deleg_xgrade_none_ext(open, dp); if (fp) put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; /* * To finish the open response, we just need to set the rflags. */ open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX; if (nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; if (dp) nfs4_put_stid(&dp->dl_stid); if (stp) nfs4_put_stid(&stp->st_stid); return status; } void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { if (open->op_openowner) nfs4_put_stateowner(&open->op_openowner->oo_owner); if (open->op_file) kmem_cache_free(file_slab, open->op_file); if (open->op_stp) nfs4_put_stid(&open->op_stp->st_stid); if (open->op_odstate) kmem_cache_free(odstate_slab, open->op_odstate); } __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { clientid_t *clid = &u->renew; struct nfs4_client *clp; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); trace_nfsd_clid_renew(clid); status = set_client(clid, cstate, nn); if (status) return status; clp = cstate->clp; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) return nfserr_cb_path_down; return nfs_ok; } void nfsd4_end_grace(struct nfsd_net *nn) { /* do nothing if grace period already ended */ if (nn->grace_ended) return; trace_nfsd_grace_complete(nn); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 * client will still be allowed to reclaim after it comes back up, * even if it hasn't yet had a chance to reclaim state this time. * */ nfsd4_record_grace_done(nn); /* * At this point, NFSv4 clients can still reclaim. But if the * server crashes, any that have not yet reclaimed will be out * of luck on the next boot. * * (NFSv4.1+ clients are considered to have reclaimed once they * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to * have reclaimed after their first OPEN.) */ locks_end_grace(&nn->nfsd4_manager); /* * At this point, and once lockd and/or any other containers * exit their grace period, further reclaims will fail and * regular locking can resume. */ } /* * If we've waited a lease period but there are still clients trying to * reclaim, wait a little longer to give them a chance to finish. */ static bool clients_still_reclaiming(struct nfsd_net *nn) { time64_t double_grace_period_end = nn->boot_time + 2 * nn->nfsd4_lease; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == nn->reclaim_str_hashtbl_size) return false; if (!nn->somebody_reclaimed) return false; nn->somebody_reclaimed = false; /* * If we've given them *two* lease times to reclaim, and they're * still not done, give up: */ if (ktime_get_boottime_seconds() > double_grace_period_end) return false; return true; } struct laundry_time { time64_t cutoff; time64_t new_timeo; }; static bool state_expired(struct laundry_time *lt, time64_t last_refresh) { time64_t time_remaining; if (last_refresh < lt->cutoff) return true; time_remaining = last_refresh - lt->cutoff; lt->new_timeo = min(lt->new_timeo, time_remaining); return false; } #ifdef CONFIG_NFSD_V4_2_INTER_SSC void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) { spin_lock_init(&nn->nfsd_ssc_lock); INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); init_waitqueue_head(&nn->nfsd_ssc_waitq); } /* * This is called when nfsd is being shutdown, after all inter_ssc * cleanup were done, to destroy the ssc delayed unmount list. */ static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) { struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { list_del(&ni->nsui_list); spin_unlock(&nn->nfsd_ssc_lock); mntput(ni->nsui_vfsmount); kfree(ni); spin_lock(&nn->nfsd_ssc_lock); } spin_unlock(&nn->nfsd_ssc_lock); } static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) { bool do_wakeup = false; struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { if (time_after(jiffies, ni->nsui_expire)) { if (refcount_read(&ni->nsui_refcnt) > 1) continue; /* mark being unmount */ ni->nsui_busy = true; spin_unlock(&nn->nfsd_ssc_lock); mntput(ni->nsui_vfsmount); spin_lock(&nn->nfsd_ssc_lock); /* waiters need to start from begin of list */ list_del(&ni->nsui_list); kfree(ni); /* wakeup ssc_connect waiters */ do_wakeup = true; continue; } break; } if (do_wakeup) wake_up_all(&nn->nfsd_ssc_waitq); spin_unlock(&nn->nfsd_ssc_lock); } #endif /* Check if any lock belonging to this lockowner has any blockers */ static bool nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) { struct file_lock_context *ctx; struct nfs4_ol_stateid *stp; struct nfs4_file *nf; list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) return true; } return false; } static bool nfs4_anylock_blockers(struct nfs4_client *clp) { int i; struct nfs4_stateowner *so; struct nfs4_lockowner *lo; if (atomic_read(&clp->cl_delegs_in_recall)) return true; spin_lock(&clp->cl_lock); for (i = 0; i < OWNER_HASH_SIZE; i++) { list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i], so_strhash) { if (so->so_is_open_owner) continue; lo = lockowner(so); if (nfs4_lockowner_has_blockers(lo)) { spin_unlock(&clp->cl_lock); return true; } } } spin_unlock(&clp->cl_lock); return false; } static void nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, struct laundry_time *lt) { unsigned int maxreap, reapcnt = 0; struct list_head *pos, *next; struct nfs4_client *clp; maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; INIT_LIST_HEAD(reaplist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (clp->cl_state == NFSD4_EXPIRABLE) goto exp_client; if (!state_expired(lt, clp->cl_time)) break; if (!atomic_read(&clp->cl_rpc_users)) { if (clp->cl_state == NFSD4_ACTIVE) atomic_inc(&nn->nfsd_courtesy_clients); clp->cl_state = NFSD4_COURTESY; } if (!client_has_state(clp)) goto exp_client; if (!nfs4_anylock_blockers(clp)) if (reapcnt >= maxreap) continue; exp_client: if (!mark_client_expired_locked(clp)) { list_add(&clp->cl_lru, reaplist); reapcnt++; } } spin_unlock(&nn->client_lock); } static void nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist) { unsigned int maxreap = 0, reapcnt = 0; struct list_head *pos, *next; struct nfs4_client *clp; maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; INIT_LIST_HEAD(reaplist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (clp->cl_state == NFSD4_ACTIVE) break; if (reapcnt >= maxreap) break; if (!mark_client_expired_locked(clp)) { list_add(&clp->cl_lru, reaplist); reapcnt++; } } spin_unlock(&nn->client_lock); } static void nfs4_process_client_reaplist(struct list_head *reaplist) { struct list_head *pos, *next; struct nfs4_client *clp; list_for_each_safe(pos, next, reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); trace_nfsd_clid_purged(&clp->cl_clientid); list_del_init(&clp->cl_lru); expire_client(clp); } } static void nfs40_clean_admin_revoked(struct nfsd_net *nn, struct laundry_time *lt) { struct nfs4_client *clp; spin_lock(&nn->client_lock); if (nn->nfs40_last_revoke == 0 || nn->nfs40_last_revoke > lt->cutoff) { spin_unlock(&nn->client_lock); return; } nn->nfs40_last_revoke = 0; retry: list_for_each_entry(clp, &nn->client_lru, cl_lru) { unsigned long id, tmp; struct nfs4_stid *stid; if (atomic_read(&clp->cl_admin_revoked) == 0) continue; spin_lock(&clp->cl_lock); idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) { refcount_inc(&stid->sc_count); spin_unlock(&nn->client_lock); /* this function drops ->cl_lock */ nfsd4_drop_revoked_stid(stid); nfs4_put_stid(stid); spin_lock(&nn->client_lock); goto retry; } spin_unlock(&clp->cl_lock); } spin_unlock(&nn->client_lock); } static time64_t nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; struct laundry_time lt = { .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease, .new_timeo = nn->nfsd4_lease }; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; if (clients_still_reclaiming(nn)) { lt.new_timeo = 0; goto out; } nfsd4_end_grace(nn); spin_lock(&nn->s2s_cp_lock); idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID && state_expired(&lt, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); nfsd4_async_copy_reaper(nn); nfs4_get_client_reaplist(nn, &reaplist, &lt); nfs4_process_client_reaplist(&reaplist); nfs40_clean_admin_revoked(nn, &lt); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (!state_expired(&lt, dp->dl_time)) break; refcount_inc(&dp->dl_stid.sc_count); unhash_delegation_locked(dp, SC_STATUS_REVOKED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_first_entry(&reaplist, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); } spin_lock(&nn->client_lock); while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); if (!state_expired(&lt, oo->oo_time)) break; list_del_init(&oo->oo_close_lru); stp = oo->oo_last_closed_stid; oo->oo_last_closed_stid = NULL; spin_unlock(&nn->client_lock); nfs4_put_stid(&stp->st_stid); spin_lock(&nn->client_lock); } spin_unlock(&nn->client_lock); /* * It's possible for a client to try and acquire an already held lock * that is being held for a long time, and then lose interest in it. * So, we clean out any un-revisited request after a lease period * under the assumption that the client is no longer interested. * * RFC5661, sec. 9.6 states that the client must not rely on getting * notifications and must continue to poll for locks, even when the * server supports them. Thus this shouldn't lead to clients blocking * indefinitely once the lock does become free. */ BUG_ON(!list_empty(&reaplist)); spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); if (!state_expired(&lt, nbl->nbl_time)) break; list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } #ifdef CONFIG_NFSD_V4_2_INTER_SSC /* service the server-to-server copy delayed unmount list */ nfsd4_ssc_expire_umount(nn); #endif if (atomic_long_read(&num_delegations) >= max_delegations) deleg_reaper(nn); out: return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } static void laundromat_main(struct work_struct *); static void laundromat_main(struct work_struct *laundry) { time64_t t; struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); t = nfs4_laundromat(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } static void courtesy_client_reaper(struct nfsd_net *nn) { struct list_head reaplist; nfs4_get_courtesy_client_reaplist(nn, &reaplist); nfs4_process_client_reaplist(&reaplist); } static void deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (clp->cl_state != NFSD4_ACTIVE) continue; if (list_empty(&clp->cl_delegations)) continue; if (atomic_read(&clp->cl_delegs_in_recall)) continue; if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) continue; if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) continue; if (clp->cl_cb_state != NFSD4_CB_UP) continue; /* release in nfsd4_cb_recall_any_release */ kref_get(&clp->cl_nfsdfs.cl_ref); clp->cl_ra_time = ktime_get_boottime_seconds(); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } spin_unlock(&nn->client_lock); } static void nfsd4_state_shrinker_worker(struct work_struct *work) { struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); deleg_reaper(nn); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } static __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) { __be32 status = nfserr_openmode; /* For lock stateid's, we test the parent open, not the lock: */ if (stp->st_openstp) stp = stp->st_openstp; if ((flags & WR_STATE) && !access_permit_write(stp)) goto out; if ((flags & RD_STATE) && !access_permit_read(stp)) goto out; status = nfs_ok; out: return status; } static inline __be32 check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags) { if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; else if (opens_in_grace(net)) { /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; } else if (flags & WR_STATE) return nfs4_share_conflict(current_fh, NFS4_SHARE_DENY_WRITE); else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */ return nfs4_share_conflict(current_fh, NFS4_SHARE_DENY_READ); } static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) { /* * When sessions are used the stateid generation number is ignored * when it is zero. */ if (has_session && in->si_generation == 0) return nfs_ok; if (in->si_generation == ref->si_generation) return nfs_ok; /* If the client sends us a stateid from the future, it's buggy: */ if (nfsd4_stateid_generation_after(in, ref)) return nfserr_bad_stateid; /* * However, we could see a stateid from the past, even from a * non-buggy client. For example, if the client sends a lock * while some IO is outstanding, the lock may bump si_generation * while the IO is still in flight. The client could avoid that * situation by waiting for responses on all the IO requests, * but better performance may result in retrying IO that * receives an old_stateid error if requests are rarely * reordered in flight: */ return nfserr_old_stateid; } static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session) { __be32 ret; spin_lock(&s->sc_lock); ret = nfsd4_verify_open_stid(s); if (ret == nfs_ok) ret = check_stateid_generation(in, &s->sc_stateid, has_session); spin_unlock(&s->sc_lock); if (ret == nfserr_admin_revoked) nfsd40_drop_revoked_stid(s->sc_client, &s->sc_stateid); return ret; } static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) { if (ols->st_stateowner->so_is_open_owner && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) return nfserr_bad_stateid; return nfs_ok; } static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock; status = nfsd4_stid_check_stateid_generation(stateid, s, 1); if (status) goto out_unlock; status = nfsd4_verify_open_stid(s); if (status) goto out_unlock; switch (s->sc_type) { case SC_TYPE_DELEG: status = nfs_ok; break; case SC_TYPE_OPEN: case SC_TYPE_LOCK: status = nfsd4_check_openowner_confirmed(openlockstateid(s)); break; default: printk("unknown stateid type %x\n", s->sc_type); status = nfserr_bad_stateid; } out_unlock: spin_unlock(&cl->cl_lock); if (status == nfserr_admin_revoked) nfsd40_drop_revoked_stid(cl, stateid); return status; } __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned short typemask, unsigned short statusmask, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; struct nfs4_stid *stid; bool return_revoked = false; /* * only return revoked delegations if explicitly asked. * otherwise we report revoked or bad_stateid status. */ if (statusmask & SC_STATUS_REVOKED) return_revoked = true; if (typemask & SC_TYPE_DELEG) /* Always allow REVOKED for DELEG so we can * return the appropriate error. */ statusmask |= SC_STATUS_REVOKED; statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = set_client(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; return nfserr_stale_stateid; } if (status) return status; stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask); if (!stid) return nfserr_bad_stateid; if ((stid->sc_status & SC_STATUS_REVOKED) && !return_revoked) { nfs4_put_stid(stid); return nfserr_deleg_revoked; } if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) { nfsd40_drop_revoked_stid(cstate->clp, stateid); nfs4_put_stid(stid); return nfserr_admin_revoked; } *s = stid; return nfs_ok; } static struct nfsd_file * nfs4_find_file(struct nfs4_stid *s, int flags) { struct nfsd_file *ret = NULL; if (!s || s->sc_status) return NULL; switch (s->sc_type) { case SC_TYPE_DELEG: case SC_TYPE_OPEN: case SC_TYPE_LOCK: if (flags & RD_STATE) ret = find_readable_file(s->sc_file); else ret = find_writeable_file(s->sc_file); } return ret; } static __be32 nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags) { __be32 status; status = nfsd4_check_openowner_confirmed(ols); if (status) return status; return nfs4_check_openmode(ols, flags); } static __be32 nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, struct nfsd_file **nfp, int flags) { int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE; struct nfsd_file *nf; __be32 status; nf = nfs4_find_file(s, flags); if (nf) { status = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, fhp->fh_dentry, acc | NFSD_MAY_OWNER_OVERRIDE); if (status) { nfsd_file_put(nf); goto out; } } else { status = nfsd_file_acquire(rqstp, fhp, acc, &nf); if (status) return status; } *nfp = nf; out: return status; } static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) return; list_del(&cps->cp_list); idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.cs_stid.si_opaque.so_id); kfree(cps); } /* * A READ from an inter server to server COPY will have a * copy stateid. Look up the copy notify stateid from the * idr structure and take a reference on it. */ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_client *clp, struct nfs4_cpntf_state **cps) { copy_stateid_t *cps_t; struct nfs4_cpntf_state *state = NULL; if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id) return nfserr_bad_stateid; spin_lock(&nn->s2s_cp_lock); cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id); if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) { state = NULL; goto unlock; } if (!clp) refcount_inc(&state->cp_stateid.cs_count); else _free_cpntf_state_locked(nn, state); } unlock: spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; if (!clp) *cps = state; return 0; } static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_stid **stid) { __be32 status; struct nfs4_cpntf_state *cps = NULL; struct nfs4_client *found; status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status; cps->cpntf_time = ktime_get_boottime_seconds(); status = nfserr_expired; found = lookup_clientid(&cps->cp_p_clid, true, nn); if (!found) goto out; *stid = find_stateid_by_type(found, &cps->cp_p_stateid, SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK, 0); if (*stid) status = nfs_ok; else status = nfserr_bad_stateid; put_client_renew(found); out: nfs4_put_cpntf_state(nn, cps); return status; } void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { spin_lock(&nn->s2s_cp_lock); _free_cpntf_state_locked(nn, cps); spin_unlock(&nn->s2s_cp_lock); } /** * nfs4_preprocess_stateid_op - find and prep stateid for an operation * @rqstp: incoming request from client * @cstate: current compound state * @fhp: filehandle associated with requested stateid * @stateid: stateid (provided by client) * @flags: flags describing type of operation to be done * @nfp: optional nfsd_file return pointer (may be NULL) * @cstid: optional returned nfs4_stid pointer (may be NULL) * * Given info from the client, look up a nfs4_stid for the operation. On * success, it returns a reference to the nfs4_stid and/or the nfsd_file * associated with it. */ __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, stateid_t *stateid, int flags, struct nfsd_file **nfp, struct nfs4_stid **cstid) { struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfs4_stid *s = NULL; __be32 status; if (nfp) *nfp = NULL; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { status = check_special_stateids(net, fhp, stateid, flags); goto done; } status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK, 0, &s, nn); if (status == nfserr_bad_stateid) status = find_cpntf_state(nn, stateid, &s); if (status) return status; status = nfsd4_stid_check_stateid_generation(stateid, s, nfsd4_has_session(cstate)); if (status) goto out; switch (s->sc_type) { case SC_TYPE_DELEG: status = nfs4_check_delegmode(delegstateid(s), flags); break; case SC_TYPE_OPEN: case SC_TYPE_LOCK: status = nfs4_check_olstateid(openlockstateid(s), flags); break; } if (status) goto out; status = nfs4_check_fh(fhp, s); done: if (status == nfs_ok && nfp) status = nfs4_check_file(rqstp, fhp, s, nfp, flags); out: if (s) { if (!status && cstid) *cstid = s; else nfs4_put_stid(s); } return status; } /* * Test if the stateid is valid */ __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; struct nfs4_client *cl = cstate->clp; list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); return nfs_ok; } static __be32 nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) { struct nfs4_ol_stateid *stp = openlockstateid(s); __be32 ret; ret = nfsd4_lock_ol_stateid(stp); if (ret) goto out_put_stid; ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) goto out; ret = nfserr_locks_held; if (check_for_locks(stp->st_stid.sc_file, lockowner(stp->st_stateowner))) goto out; release_lock_stateid(stp); ret = nfs_ok; out: mutex_unlock(&stp->st_mutex); out_put_stid: nfs4_put_stid(s); return ret; } __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_free_stateid *free_stateid = &u->free_stateid; stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; struct nfs4_client *cl = cstate->clp; __be32 ret = nfserr_bad_stateid; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s || s->sc_status & SC_STATUS_CLOSED) goto out_unlock; if (s->sc_status & SC_STATUS_ADMIN_REVOKED) { nfsd4_drop_revoked_stid(s); ret = nfs_ok; goto out; } spin_lock(&s->sc_lock); switch (s->sc_type) { case SC_TYPE_DELEG: if (s->sc_status & SC_STATUS_REVOKED) { s->sc_status |= SC_STATUS_CLOSED; spin_unlock(&s->sc_lock); dp = delegstateid(s); if (s->sc_status & SC_STATUS_FREEABLE) list_del_init(&dp->dl_recall_lru); s->sc_status |= SC_STATUS_FREED; spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; goto out; } ret = nfserr_locks_held; break; case SC_TYPE_OPEN: ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) break; ret = nfserr_locks_held; break; case SC_TYPE_LOCK: spin_unlock(&s->sc_lock); refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; } spin_unlock(&s->sc_lock); out_unlock: spin_unlock(&cl->cl_lock); out: return ret; } static inline int setlkflg (int type) { return (type == NFS4_READW_LT || type == NFS4_READ_LT) ? RD_STATE : WR_STATE; } static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp) { struct svc_fh *current_fh = &cstate->current_fh; struct nfs4_stateowner *sop = stp->st_stateowner; __be32 status; status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; status = nfsd4_lock_ol_stateid(stp); if (status != nfs_ok) return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok) mutex_unlock(&stp->st_mutex); return status; } /** * nfs4_preprocess_seqid_op - find and prep an ol_stateid for a seqid-morphing op * @cstate: compund state * @seqid: seqid (provided by client) * @stateid: stateid (provided by client) * @typemask: mask of allowable types for this operation * @statusmask: mask of allowed states: 0 or STID_CLOSED * @stpp: return pointer for the stateid found * @nn: net namespace for request * * Given a stateid+seqid from a client, look up an nfs4_ol_stateid and * return it in @stpp. On a nfs_ok return, the returned stateid will * have its st_mutex locked. */ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, unsigned short typemask, unsigned short statusmask, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_stid *s; struct nfs4_ol_stateid *stp = NULL; trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; retry: status = nfsd4_lookup_stateid(cstate, stateid, typemask, statusmask, &s, nn); if (status) return status; stp = openlockstateid(s); if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) { nfs4_put_stateowner(stp->st_stateowner); goto retry; } status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status) *stpp = stp; else nfs4_put_stid(&stp->st_stid); return status; } static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, SC_TYPE_OPEN, 0, &stp, nn); if (status) return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } *stpp = stp; return nfs_ok; } __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_open_confirm *oc = &u->open_confirm; __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_confirm on file %pd\n", cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) return status; status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, SC_TYPE_OPEN, 0, &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) { mutex_unlock(&stp->st_mutex); goto put_stateid; } oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid); nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); return status; } static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) { if (!test_access(access, stp)) return; nfs4_file_put_access(stp->st_stid.sc_file, access); clear_access(access, stp); } static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) { switch (to_access) { case NFS4_SHARE_ACCESS_READ: nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE); nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); break; case NFS4_SHARE_ACCESS_WRITE: nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ); nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); break; case NFS4_SHARE_ACCESS_BOTH: break; default: WARN_ON_ONCE(1); } } __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_open_downgrade *od = &u->open_downgrade; __be32 status; struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", cstate->current_fh.fh_dentry); /* We don't yet support WANT bits: */ if (od->od_deleg_want) dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, od->od_deleg_want); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; if (!test_access(od->od_share_access, stp)) { dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); goto put_stateid; } if (!test_deny(od->od_share_deny, stp)) { dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); reset_union_bmap_deny(od->od_share_deny, stp); nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); return status; } static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; LIST_HEAD(reaplist); struct nfs4_ol_stateid *stp; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); if (clp->cl_minorversion) { if (unhashed) put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); list_for_each_entry(stp, &reaplist, st_locks) nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); return false; } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); return unhashed; } } /* * nfs4_unlock_state() called after encode */ __be32 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_close *close = &u->close; __be32 status; struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool need_move_to_close_list; dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, SC_TYPE_OPEN, SC_STATUS_CLOSED, &stp, nn); nfsd4_bump_seqid(cstate, status); if (status) goto out; spin_lock(&stp->st_stid.sc_client->cl_lock); stp->st_stid.sc_status |= SC_STATUS_CLOSED; spin_unlock(&stp->st_stid.sc_client->cl_lock); /* * Technically we don't _really_ have to increment or copy it, since * it should just be gone after this operation and we clobber the * copied value below, but we continue to do so here just to ensure * that racing ops see that there was a state change. */ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); need_move_to_close_list = nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); if (need_move_to_close_list) move_to_close_lru(stp, net); /* v4.1+ suggests that we send a special stateid in here, since the * clients should just ignore this anyway. Since this is not useful * for v4.0 clients either, we set it to the special close_stateid * universally. * * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5 */ memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid)); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: return status; } __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_delegreturn *dr = &u->delegreturn; struct nfs4_delegation *dp; stateid_t *stateid = &dr->dr_stateid; struct nfs4_stid *s; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate)); if (status) goto put_stateid; trace_nfsd_deleg_return(stateid); destroy_delegation(dp); smp_mb__after_atomic(); wake_up_var(d_inode(cstate->current_fh.fh_dentry)); put_stateid: nfs4_put_stid(&dp->dl_stid); out: return status; } /* last octet in a range */ static inline u64 last_byte_offset(u64 start, u64 len) { u64 end; WARN_ON_ONCE(!len); end = start + len; return end > start ? end - 1: NFS4_MAX_UINT64; } /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th * byte, because of sign extension problems. Since NFSv4 calls for 64-bit * locking, this prevents us from being completely protocol-compliant. The * real solution to this problem is to start using unsigned file offsets in * the VFS, but this is a very deep change! */ static inline void nfs4_transform_lock_offset(struct file_lock *lock) { if (lock->fl_start < 0) lock->fl_start = OFFSET_MAX; if (lock->fl_end < 0) lock->fl_end = OFFSET_MAX; } static fl_owner_t nfsd4_lm_get_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; nfs4_get_stateowner(&lo->lo_owner); return owner; } static void nfsd4_lm_put_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; if (lo) nfs4_put_stateowner(&lo->lo_owner); } /* return pointer to struct nfs4_client if client is expirable */ static bool nfsd4_lm_lock_expirable(struct file_lock *cfl) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *) cfl->c.flc_owner; struct nfs4_client *clp = lo->lo_owner.so_client; struct nfsd_net *nn; if (try_to_expire_client(clp)) { nn = net_generic(clp->net, nfsd_net_id); mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); return true; } return false; } /* schedule laundromat to run immediately and wait for it to complete */ static void nfsd4_lm_expire_lock(void) { flush_workqueue(laundry_wq); } static void nfsd4_lm_notify(struct file_lock *fl) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *) fl->c.flc_owner; struct net *net = lo->lo_owner.so_client->net; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd4_blocked_lock *nbl = container_of(fl, struct nfsd4_blocked_lock, nbl_lock); bool queue = false; /* An empty list means that something else is going to be using it */ spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list)) { list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); queue = true; } spin_unlock(&nn->blocked_locks_lock); if (queue) { trace_nfsd_cb_notify_lock(lo, nbl); nfsd4_try_run_cb(&nbl->nbl_cb); } } static const struct lock_manager_operations nfsd_posix_mng_ops = { .lm_mod_owner = THIS_MODULE, .lm_notify = nfsd4_lm_notify, .lm_get_owner = nfsd4_lm_get_owner, .lm_put_owner = nfsd4_lm_put_owner, .lm_lock_expirable = nfsd4_lm_lock_expirable, .lm_expire_lock = nfsd4_lm_expire_lock, }; static inline void nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) { struct nfs4_lockowner *lo; if (fl->fl_lmops == &nfsd_posix_mng_ops) { lo = (struct nfs4_lockowner *) fl->c.flc_owner; xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner, GFP_KERNEL); if (!deny->ld_owner.data) /* We just don't care that much */ goto nevermind; deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { nevermind: deny->ld_owner.len = 0; deny->ld_owner.data = NULL; deny->ld_clientid.cl_boot = 0; deny->ld_clientid.cl_id = 0; } deny->ld_start = fl->fl_start; deny->ld_length = NFS4_MAX_UINT64; if (fl->fl_end != NFS4_MAX_UINT64) deny->ld_length = fl->fl_end - fl->fl_start + 1; deny->ld_type = NFS4_READ_LT; if (fl->c.flc_type != F_RDLCK) deny->ld_type = NFS4_WRITE_LT; } static struct nfs4_lockowner * find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner) { unsigned int strhashval = ownerstr_hashval(owner); struct nfs4_stateowner *so; lockdep_assert_held(&clp->cl_lock); list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval], so_strhash) { if (so->so_is_open_owner) continue; if (same_owner_str(so, owner)) return lockowner(nfs4_get_stateowner(so)); } return NULL; } static struct nfs4_lockowner * find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner) { struct nfs4_lockowner *lo; spin_lock(&clp->cl_lock); lo = find_lockowner_str_locked(clp, owner); spin_unlock(&clp->cl_lock); return lo; } static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { unhash_lockowner_locked(lockowner(sop)); } static void nfs4_free_lockowner(struct nfs4_stateowner *sop) { struct nfs4_lockowner *lo = lockowner(sop); kmem_cache_free(lockowner_slab, lo); } static const struct nfs4_stateowner_operations lockowner_ops = { .so_unhash = nfs4_unhash_lockowner, .so_free = nfs4_free_lockowner, }; /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * * strhashval = ownerstr_hashval */ static struct nfs4_lockowner * alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { struct nfs4_lockowner *lo, *ret; lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) return NULL; INIT_LIST_HEAD(&lo->lo_blocked); INIT_LIST_HEAD(&lo->lo_owner.so_stateids); lo->lo_owner.so_is_open_owner = 0; lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; lo->lo_owner.so_ops = &lockowner_ops; spin_lock(&clp->cl_lock); ret = find_lockowner_str_locked(clp, &lock->lk_new_owner); if (ret == NULL) { list_add(&lo->lo_owner.so_strhash, &clp->cl_ownerstr_hashtbl[strhashval]); ret = lo; } else nfs4_free_stateowner(&lo->lo_owner); spin_unlock(&clp->cl_lock); return ret; } static struct nfs4_ol_stateid * find_lock_stateid(const struct nfs4_lockowner *lo, const struct nfs4_ol_stateid *ost) { struct nfs4_ol_stateid *lst; lockdep_assert_held(&ost->st_stid.sc_client->cl_lock); /* If ost is not hashed, ost->st_locks will not be valid */ if (!nfs4_ol_stateid_unhashed(ost)) list_for_each_entry(lst, &ost->st_locks, st_locks) { if (lst->st_stateowner == &lo->lo_owner) { refcount_inc(&lst->st_stid.sc_count); return lst; } } return NULL; } static struct nfs4_ol_stateid * init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, struct nfs4_ol_stateid *open_stp) { struct nfs4_client *clp = lo->lo_owner.so_client; struct nfs4_ol_stateid *retstp; mutex_init(&stp->st_mutex); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&clp->cl_lock); if (nfs4_ol_stateid_unhashed(open_stp)) goto out_close; retstp = find_lock_stateid(lo, open_stp); if (retstp) goto out_found; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = SC_TYPE_LOCK; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; spin_lock(&fp->fi_lock); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); return stp; out_found: spin_unlock(&clp->cl_lock); if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { nfs4_put_stid(&retstp->st_stid); goto retry; } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); return retstp; out_close: spin_unlock(&clp->cl_lock); mutex_unlock(&stp->st_mutex); return NULL; } static struct nfs4_ol_stateid * find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, struct inode *inode, struct nfs4_ol_stateid *ost, bool *new) { struct nfs4_stid *ns = NULL; struct nfs4_ol_stateid *lst; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *clp = oo->oo_owner.so_client; *new = false; spin_lock(&clp->cl_lock); lst = find_lock_stateid(lo, ost); spin_unlock(&clp->cl_lock); if (lst != NULL) { if (nfsd4_lock_ol_stateid(lst) == nfs_ok) goto out; nfs4_put_stid(&lst->st_stid); } ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost); if (lst == openlockstateid(ns)) *new = true; else nfs4_put_stid(ns); out: return lst; } static int check_lock_length(u64 offset, u64 length) { return ((length == 0) || ((length != NFS4_MAX_UINT64) && (length > ~offset))); } static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { struct nfs4_file *fp = lock_stp->st_stid.sc_file; lockdep_assert_held(&fp->fi_lock); if (test_access(access, lock_stp)) return; __nfs4_file_get_access(fp, access); set_access(access, lock_stp); } static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **plst, bool *new) { __be32 status; struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; struct inode *inode = d_inode(cstate->current_fh.fh_dentry); struct nfs4_lockowner *lo; struct nfs4_ol_stateid *lst; unsigned int strhashval; lo = find_lockowner_str(cl, &lock->lk_new_owner); if (!lo) { strhashval = ownerstr_hashval(&lock->lk_new_owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox; } else { /* with an existing lockowner, seqids must be the same */ status = nfserr_bad_seqid; if (!cstate->minorversion && lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) goto out; } lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (lst == NULL) { status = nfserr_jukebox; goto out; } status = nfs_ok; *plst = lst; out: nfs4_put_stateowner(&lo->lo_owner); return status; } /* * LOCK operation */ __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_lock *lock = &u->lock; struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; struct nfs4_ol_stateid *lock_stp = NULL; struct nfs4_ol_stateid *open_stp = NULL; struct nfs4_file *fp; struct nfsd_file *nf = NULL; struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; __be32 status = 0; int lkflg; int err; bool new = false; unsigned char type; unsigned int flags = FL_POSIX; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, (long long) lock->lk_length); if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status != nfs_ok) return status; if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) { status = nfserr_notsupp; goto out; } if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->lk_new_clientid, &cstate->clp->cl_clientid, sizeof(clientid_t)); /* validate and update open stateid and open seqid */ status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, &open_stp, nn); if (status) goto out; mutex_unlock(&open_stp->st_mutex); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->lk_new_clientid)) goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, SC_TYPE_LOCK, 0, &lock_stp, nn); } if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); lkflg = setlkflg(lock->lk_type); status = nfs4_check_openmode(lock_stp, lkflg); if (status) goto out; status = nfserr_grace; if (locks_in_grace(net) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; if (!locks_in_grace(net) && lock->lk_reclaim) goto out; if (lock->lk_reclaim) flags |= FL_RECLAIM; fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: fallthrough; case NFS4_READ_LT: spin_lock(&fp->fi_lock); nf = find_readable_file_locked(fp); if (nf) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); spin_unlock(&fp->fi_lock); type = F_RDLCK; break; case NFS4_WRITEW_LT: fallthrough; case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); nf = find_writeable_file_locked(fp); if (nf) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); spin_unlock(&fp->fi_lock); type = F_WRLCK; break; default: status = nfserr_inval; goto out; } if (!nf) { status = nfserr_openmode; goto out; } if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) && nfsd4_has_session(cstate) && locks_can_async_lock(nf->nf_file->f_op)) flags |= FL_SLEEP; nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { dprintk("NFSD: %s: unable to allocate block!\n", __func__); status = nfserr_jukebox; goto out; } file_lock = &nbl->nbl_lock; file_lock->c.flc_type = type; file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); file_lock->c.flc_pid = current->tgid; file_lock->c.flc_file = nf->nf_file; file_lock->c.flc_flags = flags; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = lock->lk_offset; file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); nfs4_transform_lock_offset(file_lock); conflock = locks_alloc_lock(); if (!conflock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; goto out; } if (flags & FL_SLEEP) { nbl->nbl_time = ktime_get_boottime_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); kref_get(&nbl->nbl_kref); spin_unlock(&nn->blocked_locks_lock); } err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock); switch (err) { case 0: /* success! */ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; if (lock->lk_reclaim) nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: kref_put(&nbl->nbl_kref, free_nbl); nbl = NULL; fallthrough; case -EAGAIN: /* conflock holds conflicting lock */ status = nfserr_denied; dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); nfs4_set_lock_denied(conflock, &lock->lk_denied); break; case -EDEADLK: status = nfserr_deadlock; break; default: dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); status = nfserrno(err); break; } out: if (nbl) { /* dequeue it if we queued it before */ if (flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list) && !list_empty(&nbl->nbl_lru)) { list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); kref_put(&nbl->nbl_kref, free_nbl); } /* nbl can use one of lists to be linked to reaplist */ spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); } if (nf) nfsd_file_put(nf); if (lock_stp) { /* Bump seqid manually if the 4.0 replay owner is openowner */ if (cstate->replay_owner && cstate->replay_owner != &lock_sop->lo_owner && seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. */ if (status && new) release_lock_stateid(lock_stp); mutex_unlock(&lock_stp->st_mutex); nfs4_put_stid(&lock_stp->st_stid); } if (open_stp) nfs4_put_stid(&open_stp->st_stid); nfsd4_bump_seqid(cstate, status); if (conflock) locks_free_lock(conflock); return status; } void nfsd4_lock_release(union nfsd4_op_u *u) { struct nfsd4_lock *lock = &u->lock; struct nfsd4_lock_denied *deny = &lock->lk_denied; kfree(deny->ld_owner.data); } /* * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, * so we do a temporary open here just to get an open file to pass to * vfs_test_lock. */ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; struct inode *inode; __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; inode = fhp->fh_dentry->d_inode; inode_lock(inode); /* to block new leases till after test_lock: */ err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out; lock->c.flc_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); lock->c.flc_file = NULL; out: inode_unlock(inode); nfsd_file_put(nf); return err; } /* * LOCKT operation */ __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_lockt *lockt = &u->lockt; struct file_lock *file_lock = NULL; struct nfs4_lockowner *lo = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; if (!nfsd4_has_session(cstate)) { status = set_client(&lockt->lt_clientid, cstate, nn); if (status) goto out; } if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; goto out; } switch (lockt->lt_type) { case NFS4_READ_LT: case NFS4_READW_LT: file_lock->c.flc_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: file_lock->c.flc_type = F_WRLCK; break; default: dprintk("NFSD: nfs4_lockt: bad lock type!\n"); status = nfserr_inval; goto out; } lo = find_lockowner_str(cstate->clp, &lockt->lt_owner); if (lo) file_lock->c.flc_owner = (fl_owner_t)lo; file_lock->c.flc_pid = current->tgid; file_lock->c.flc_flags = FL_POSIX; file_lock->fl_start = lockt->lt_offset; file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); nfs4_transform_lock_offset(file_lock); status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock); if (status) goto out; if (file_lock->c.flc_type != F_UNLCK) { status = nfserr_denied; nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: if (lo) nfs4_put_stateowner(&lo->lo_owner); if (file_lock) locks_free_lock(file_lock); return status; } void nfsd4_lockt_release(union nfsd4_op_u *u) { struct nfsd4_lockt *lockt = &u->lockt; struct nfsd4_lock_denied *deny = &lockt->lt_denied; kfree(deny->ld_owner.data); } __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_locku *locku = &u->locku; struct nfs4_ol_stateid *stp; struct nfsd_file *nf = NULL; struct file_lock *file_lock = NULL; __be32 status; int err; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", (long long) locku->lu_offset, (long long) locku->lu_length); if (check_lock_length(locku->lu_offset, locku->lu_length)) return nfserr_inval; status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, &locku->lu_stateid, SC_TYPE_LOCK, 0, &stp, nn); if (status) goto out; nf = find_any_file(stp->st_stid.sc_file); if (!nf) { status = nfserr_lock_range; goto put_stateid; } if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { status = nfserr_notsupp; goto put_file; } file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; goto put_file; } file_lock->c.flc_type = F_UNLCK; file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); file_lock->c.flc_pid = current->tgid; file_lock->c.flc_file = nf->nf_file; file_lock->c.flc_flags = FL_POSIX; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = locku->lu_offset; file_lock->fl_end = last_byte_offset(locku->lu_offset, locku->lu_length); nfs4_transform_lock_offset(file_lock); err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL); if (err) { dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); put_file: nfsd_file_put(nf); put_stateid: mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); if (file_lock) locks_free_lock(file_lock); return status; out_nfserr: status = nfserrno(err); goto put_file; } /* * returns * true: locks held by lockowner * false: no locks held by lockowner */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; spin_lock(&fp->fi_lock); nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); goto out; } inode = file_inode(nf->nf_file); flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); for_each_file_lock(fl, &flctx->flc_posix) { if (fl->c.flc_owner == (fl_owner_t)lowner) { status = true; break; } } spin_unlock(&flctx->flc_lock); } out: spin_unlock(&fp->fi_lock); return status; } /** * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations * @rqstp: RPC transaction * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * * Check if there are any locks still held and if not, free the lockowner * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found * %nfserr_locks_held: lockowner still in use * %nfserr_stale_clientid: clientid no longer active * %nfserr_expired: clientid not recognized */ __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_ol_stateid *stp; struct nfs4_lockowner *lo; struct nfs4_client *clp; LIST_HEAD(reaplist); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); status = set_client(clid, cstate, nn); if (status) return status; clp = cstate->clp; spin_lock(&clp->cl_lock); lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); if (!lo) { spin_unlock(&clp->cl_lock); return nfs_ok; } list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { if (check_for_locks(stp->st_stid.sc_file, lo)) { spin_unlock(&clp->cl_lock); nfs4_put_stateowner(&lo->lo_owner); return nfserr_locks_held; } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); unhash_lock_stateid(stp); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); return nfs_ok; } static inline struct nfs4_client_reclaim * alloc_reclaim(void) { return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); } bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client_reclaim *crp; crp = nfsd4_find_reclaim_client(name, nn); return (crp && crp->cr_clp); } /* * failure => all reset bets are off, nfserr_no_grace... * * The caller is responsible for freeing name.data if NULL is returned (it * will be freed in nfs4_remove_reclaim_record in the normal case). */ struct nfs4_client_reclaim * nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp; crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); INIT_LIST_HEAD(&crp->cr_strhash); list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); crp->cr_name.data = name.data; crp->cr_name.len = name.len; crp->cr_princhash.data = princhash.data; crp->cr_princhash.len = princhash.len; crp->cr_clp = NULL; nn->reclaim_str_hashtbl_size++; } return crp; } void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) { list_del(&crp->cr_strhash); kfree(crp->cr_name.data); kfree(crp->cr_princhash.data); kfree(crp); nn->reclaim_str_hashtbl_size--; } void nfs4_release_reclaim(struct nfsd_net *nn) { struct nfs4_client_reclaim *crp = NULL; int i; for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->reclaim_str_hashtbl[i])) { crp = list_entry(nn->reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); nfs4_remove_reclaim_record(crp, nn); } } WARN_ON_ONCE(nn->reclaim_str_hashtbl_size); } /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ struct nfs4_client_reclaim * nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { if (compare_blob(&crp->cr_name, &name) == 0) { return crp; } } return NULL; } __be32 nfs4_check_open_reclaim(struct nfs4_client *clp) { if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) return nfserr_no_grace; if (nfsd4_client_record_check(clp)) return nfserr_reclaim_bad; return nfs_ok; } /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has * the inode cached. This becomes an obvious problem the first time a * client's inode cache approaches the size of the server's total memory. * * For now we avoid this problem by imposing a hard limit on the number * of delegations, which varies according to the server's memory size. */ static void set_max_delegations(void) { /* * Allow at most 4 delegations per megabyte of RAM. Quick * estimates suggest that in the worst case (where every delegation * is for a different inode), a delegation could take about 1.5K, * giving a worst case usage of about 6% of memory. */ max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); } static int nfs4_state_create_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i; nn->conf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->conf_id_hashtbl) goto err; nn->unconf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->unconf_id_hashtbl) goto err_unconf_id; nn->sessionid_hashtbl = kmalloc_array(SESSION_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->sessionid_hashtbl) goto err_sessionid; for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); } for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; nn->boot_time = ktime_get_real_seconds(); nn->grace_ended = false; nn->nfsd4_manager.block_opens = true; INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); spin_lock_init(&nn->client_lock); spin_lock_init(&nn->s2s_cp_lock); idr_init(&nn->s2s_cp_stateids); atomic_set(&nn->pending_async_copies, 0); spin_lock_init(&nn->blocked_locks_lock); INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client"); if (!nn->nfsd_client_shrinker) goto err_shrinker; nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan; nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count; nn->nfsd_client_shrinker->private_data = nn; shrinker_register(nn->nfsd_client_shrinker); return 0; err_shrinker: put_net(net); kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: kfree(nn->conf_id_hashtbl); err: return -ENOMEM; } static void nfs4_state_destroy_net(struct net *net) { int i; struct nfs4_client *clp = NULL; struct nfsd_net *nn = net_generic(net, nfsd_net_id); for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->conf_id_hashtbl[i])) { clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); destroy_client(clp); } } WARN_ON(!list_empty(&nn->blocked_locks_lru)); for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->unconf_id_hashtbl[i])) { clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); destroy_client(clp); } } kfree(nn->sessionid_hashtbl); kfree(nn->unconf_id_hashtbl); kfree(nn->conf_id_hashtbl); put_net(net); } int nfs4_state_start_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; ret = nfs4_state_create_net(net); if (ret) return ret; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) goto skip_grace; printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); trace_nfsd_grace_start(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; skip_grace: printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n", net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ); nfsd4_end_grace(nn); return 0; } /* initialization to perform when the nfsd service is started: */ int nfs4_state_start(void) { int ret; ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) return ret; nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); if (!nfsd_slot_shrinker) { rhltable_destroy(&nfs4_file_rhltable); return -ENOMEM; } nfsd_slot_shrinker->count_objects = nfsd_slot_count; nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; shrinker_register(nfsd_slot_shrinker); set_max_delegations(); return 0; } void nfs4_state_shutdown_net(struct net *net) { struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); shrinker_free(nn->nfsd_client_shrinker); cancel_work_sync(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); unhash_delegation_locked(dp, SC_STATUS_CLOSED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); destroy_unhashed_deleg(dp); } nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif } void nfs4_state_shutdown(void) { rhltable_destroy(&nfs4_file_rhltable); shrinker_free(nfsd_slot_shrinker); } static void get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); } static void put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (cstate->minorversion) { memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } } void clear_current_stateid(struct nfsd4_compound_state *cstate) { CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } /* * functions to set current state id */ void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->open_downgrade.od_stateid); } void nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->open.op_stateid); } void nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->close.cl_stateid); } void nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->lock.lk_resp_stateid); } /* * functions to consume current state id */ void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->open_downgrade.od_stateid); } void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->delegreturn.dr_stateid); } void nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->free_stateid.fr_stateid); } void nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->setattr.sa_stateid); } void nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->close.cl_stateid); } void nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->locku.lu_stateid); } void nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->read.rd_stateid); } void nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->write.wr_stateid); } /** * nfsd4_vet_deleg_time - vet and set the timespec for a delegated timestamp update * @req: timestamp from the client * @orig: original timestamp in the inode * @now: current time * * Given a timestamp from the client response, check it against the * current timestamp in the inode and the current time. Returns true * if the inode's timestamp needs to be updated, and false otherwise. * @req may also be changed if the timestamp needs to be clamped. */ bool nfsd4_vet_deleg_time(struct timespec64 *req, const struct timespec64 *orig, const struct timespec64 *now) { /* * "When the time presented is before the original time, then the * update is ignored." Also no need to update if there is no change. */ if (timespec64_compare(req, orig) <= 0) return false; /* * "When the time presented is in the future, the server can either * clamp the new time to the current time, or it may * return NFS4ERR_DELAY to the client, allowing it to retry." */ if (timespec64_compare(req, now) > 0) *req = *now; return true; } static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp) { struct inode *inode = d_inode(dentry); struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; struct iattr attrs = { }; int ret; if (deleg_attrs_deleg(dp->dl_type)) { struct timespec64 now = current_time(inode); attrs.ia_atime = ncf->ncf_cb_atime; attrs.ia_mtime = ncf->ncf_cb_mtime; if (nfsd4_vet_deleg_time(&attrs.ia_atime, &dp->dl_atime, &now)) attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; if (nfsd4_vet_deleg_time(&attrs.ia_mtime, &dp->dl_mtime, &now)) { attrs.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; attrs.ia_ctime = attrs.ia_mtime; if (nfsd4_vet_deleg_time(&attrs.ia_ctime, &dp->dl_ctime, &now)) attrs.ia_valid |= ATTR_CTIME | ATTR_CTIME_SET; } } else { attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME; } if (!attrs.ia_valid) return 0; attrs.ia_valid |= ATTR_DELEG; inode_lock(inode); ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); inode_unlock(inode); return ret; } /** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @dentry: dentry of inode to be checked for a conflict * @pdp: returned WRITE delegation, if one was found * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server * must either use the CB_GETATTR to get the current values of the * attributes from the client that holds the delegation or recall the * delegation before replying to the GETATTR. See RFC 8881 section * 18.7.4. * * Returns 0 if there is no conflict; otherwise an nfs_stat * code is returned. If @pdp is set to a non-NULL value, then the * caller must put the reference. */ __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_delegation **pdp) { __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; struct nfs4_delegation *dp = NULL; struct file_lease *fl; struct nfs4_cb_fattr *ncf; struct inode *inode = d_inode(dentry); ctx = locks_inode_context(inode); if (!ctx) return nfs_ok; #define NON_NFSD_LEASE ((void *)1) spin_lock(&ctx->flc_lock); for_each_file_lock(fl, &ctx->flc_lease) { if (fl->c.flc_flags == FL_LAYOUT) continue; if (fl->c.flc_type == F_WRLCK) { if (fl->fl_lmops == &nfsd_lease_mng_ops) dp = fl->c.flc_owner; else dp = NON_NFSD_LEASE; } break; } if (dp == NULL || dp == NON_NFSD_LEASE || dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { spin_unlock(&ctx->flc_lock); if (dp == NON_NFSD_LEASE) { status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox || !nfsd_wait_for_delegreturn(rqstp, inode)) return status; } return 0; } nfsd_stats_wdeleg_getattr_inc(nn); refcount_inc(&dp->dl_stid.sc_count); ncf = &dp->dl_cb_fattr; nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING, TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox || !nfsd_wait_for_delegreturn(rqstp, inode)) goto out_status; } if (!ncf->ncf_file_modified && (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) ncf->ncf_file_modified = true; if (ncf->ncf_file_modified) { int err; /* * Per section 10.4.3 of RFC 8881, the server would * not update the file's metadata with the client's * modified size */ err = cb_getattr_update_times(dentry, dp); if (err) { status = nfserrno(err); goto out_status; } ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; *pdp = dp; return nfs_ok; } status = nfs_ok; out_status: nfs4_put_stid(&dp->dl_stid); return status; }
3 226 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SHRINKER_H #define _LINUX_SHRINKER_H #include <linux/atomic.h> #include <linux/types.h> #include <linux/refcount.h> #include <linux/completion.h> #define SHRINKER_UNIT_BITS BITS_PER_LONG /* * Bitmap and deferred work of shrinker::id corresponding to memcg-aware * shrinkers, which have elements charged to the memcg. */ struct shrinker_info_unit { atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); }; struct shrinker_info { struct rcu_head rcu; int map_nr_max; struct shrinker_info_unit *unit[]; }; /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. */ struct shrink_control { gfp_t gfp_mask; /* current node being shrunk (for NUMA aware shrinkers) */ int nid; /* * How many objects scan_objects should scan and try to reclaim. * This is reset before every call, so it is safe for callees * to modify. */ unsigned long nr_to_scan; /* * How many objects did scan_objects process? * This defaults to nr_to_scan before every call, but the callee * should track its actual progress. */ unsigned long nr_scanned; /* current memcg being shrunk (for memcg aware shrinkers) */ struct mem_cgroup *memcg; }; #define SHRINK_STOP (~0UL) #define SHRINK_EMPTY (~0UL - 1) /* * A callback you can register to apply pressure to ageable caches. * * @count_objects should return the number of freeable items in the cache. If * there are no objects to free, it should return SHRINK_EMPTY, while 0 is * returned in cases of the number of freeable items cannot be determined * or shrinker should skip this cache for this time (e.g., their number * is below shrinkable limit). No deadlock checks should be done during the * count callback - the shrinker relies on aggregating scan counts that couldn't * be executed due to potential deadlocks to be run at a later call when the * deadlock condition is no longer pending. * * @scan_objects will only be called if @count_objects returned a non-zero * value for the number of freeable objects. The callout should scan the cache * and attempt to free items from the cache. It should then return the number * of objects freed during the scan, or SHRINK_STOP if progress cannot be made * due to potential deadlocks. If SHRINK_STOP is returned, then no further * attempts to call the @scan_objects will be made from the current reclaim * context. * * @flags determine the shrinker abilities, like numa awareness */ struct shrinker { unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc); unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc); long batch; /* reclaim batch size, 0 = default */ int seeks; /* seeks to recreate an obj */ unsigned flags; /* * The reference count of this shrinker. Registered shrinker have an * initial refcount of 1, then the lookup operations are now allowed * to use it via shrinker_try_get(). Later in the unregistration step, * the initial refcount will be discarded, and will free the shrinker * asynchronously via RCU after its refcount reaches 0. */ refcount_t refcount; struct completion done; /* use to wait for refcount to reach 0 */ struct rcu_head rcu; void *private_data; /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG /* ID in shrinker_idr */ int id; #endif #ifdef CONFIG_SHRINKER_DEBUG int debugfs_id; const char *name; struct dentry *debugfs_entry; #endif /* objs pending delete, per node */ atomic_long_t *nr_deferred; }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ /* Internal flags */ #define SHRINKER_REGISTERED BIT(0) #define SHRINKER_ALLOCATED BIT(1) /* Flags for users to use */ #define SHRINKER_NUMA_AWARE BIT(2) #define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ #define SHRINKER_NONSLAB BIT(4) __printf(2, 3) struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); static inline bool shrinker_try_get(struct shrinker *shrinker) { return refcount_inc_not_zero(&shrinker->refcount); } static inline void shrinker_put(struct shrinker *shrinker) { if (refcount_dec_and_test(&shrinker->refcount)) complete(&shrinker->done); } #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { return 0; } #endif /* CONFIG_SHRINKER_DEBUG */ #endif /* _LINUX_SHRINKER_H */
10 3 1 72 27 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_HUGETLB_H #define _ASM_GENERIC_HUGETLB_H #include <linux/swap.h> #include <linux/swapops.h> static inline unsigned long huge_pte_write(pte_t pte) { return pte_write(pte); } static inline unsigned long huge_pte_dirty(pte_t pte) { return pte_dirty(pte); } static inline pte_t huge_pte_mkwrite(pte_t pte) { return pte_mkwrite_novma(pte); } #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT static inline pte_t huge_pte_wrprotect(pte_t pte) { return pte_wrprotect(pte); } #endif static inline pte_t huge_pte_mkdirty(pte_t pte) { return pte_mkdirty(pte); } static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) { return pte_modify(pte, newprot); } #ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP static inline pte_t huge_pte_mkuffd_wp(pte_t pte) { return huge_pte_wrprotect(pte_mkuffd_wp(pte)); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP static inline pte_t huge_pte_clear_uffd_wp(pte_t pte) { return pte_clear_uffd_wp(pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP static inline int huge_pte_uffd_wp(pte_t pte) { return pte_uffd_wp(pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { pte_clear(mm, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { set_pte_at(mm, addr, ptep, pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { return ptep_get_and_clear(mm, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return ptep_clear_flush(vma, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_NONE static inline int huge_pte_none(pte_t pte) { return pte_none(pte); } #endif /* Please refer to comments above pte_none_mostly() for the usage */ #ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { return huge_pte_none(pte) || is_pte_marker(pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { ptep_set_wrprotect(mm, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { return ptep_set_access_flags(vma, addr, ptep, pte, dirty); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_GET static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { return ptep_get(ptep); } #endif #ifndef __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED static inline bool gigantic_page_runtime_supported(void) { return IS_ENABLED(CONFIG_ARCH_HAS_GIGANTIC_PAGE); } #endif /* __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED */ #endif /* _ASM_GENERIC_HUGETLB_H */
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_NAT_H #define _NF_NAT_H #include <linux/list.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter/nf_conntrack_pptp.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <uapi/linux/netfilter/nf_nat.h> enum nf_nat_manip_type { NF_NAT_MANIP_SRC, NF_NAT_MANIP_DST }; /* SRC manip occurs POST_ROUTING or LOCAL_IN */ #define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \ (hooknum) != NF_INET_LOCAL_IN) /* per conntrack: nat application helper private data */ union nf_conntrack_nat_help { /* insert nat helper private data here */ #if IS_ENABLED(CONFIG_NF_NAT_PPTP) struct nf_nat_pptp nat_pptp_info; #endif }; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { union nf_conntrack_nat_help help; #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) int masq_index; #endif }; /* Set up the info structure to map into this range. */ unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype); extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum); struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NF_NAT) return nf_ct_ext_find(ct, NF_CT_EXT_NAT); #else return NULL; #endif } static inline bool nf_nat_oif_changed(unsigned int hooknum, enum ip_conntrack_info ctinfo, struct nf_conn_nat *nat, const struct net_device *out) { #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && nat->masq_index != out->ifindex; #else return false; #endif } int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, unsigned int ops_count); unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb); unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, enum nf_nat_manip_type mtype, enum ip_conntrack_dir dir); void nf_nat_csum_recalc(struct sk_buff *skb, u8 nfproto, u8 proto, void *data, __sum16 *check, int datalen, int oldlen); int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum); int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, unsigned int hdrlen); int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops); void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops); unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, int *action, const struct nf_nat_range2 *range, bool commit); static inline int nf_nat_initialized(const struct nf_conn *ct, enum nf_nat_manip_type manip) { if (manip == NF_NAT_MANIP_SRC) return ct->status & IPS_SRC_NAT_DONE; else return ct->status & IPS_DST_NAT_DONE; } #endif
1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 // SPDX-License-Identifier: GPL-2.0-only /* Driver for Realtek USB card reader * * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. * * Author: * Roger Tseng <rogerable@realtek.com> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/usb.h> #include <linux/platform_device.h> #include <linux/mfd/core.h> #include <linux/rtsx_usb.h> static int polling_pipe = 1; module_param(polling_pipe, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)"); static const struct mfd_cell rtsx_usb_cells[] = { [RTSX_USB_SD_CARD] = { .name = DRV_NAME_RTSX_USB_SDMMC, .pdata_size = 0, }, [RTSX_USB_MS_CARD] = { .name = DRV_NAME_RTSX_USB_MS, .pdata_size = 0, }, }; static void rtsx_usb_sg_timed_out(struct timer_list *t) { struct rtsx_ucr *ucr = timer_container_of(ucr, t, sg_timer); dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__); usb_sg_cancel(&ucr->current_sg); } static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr, unsigned int pipe, struct scatterlist *sg, int num_sg, unsigned int length, unsigned int *act_len, int timeout) { int ret; dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n", __func__, length, num_sg); ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0, sg, num_sg, length, GFP_NOIO); if (ret) return ret; ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout); add_timer(&ucr->sg_timer); usb_sg_wait(&ucr->current_sg); if (!timer_delete_sync(&ucr->sg_timer)) ret = -ETIMEDOUT; else ret = ucr->current_sg.status; if (act_len) *act_len = ucr->current_sg.bytes; return ret; } int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, void *buf, unsigned int len, int num_sg, unsigned int *act_len, int timeout) { if (timeout < 600) timeout = 600; if (num_sg) return rtsx_usb_bulk_transfer_sglist(ucr, pipe, (struct scatterlist *)buf, num_sg, len, act_len, timeout); else return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len, timeout); } EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data); static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr, u16 addr, u16 len, u8 seq_type) { rtsx_usb_cmd_hdr_tag(ucr); ucr->cmd_buf[PACKET_TYPE] = seq_type; ucr->cmd_buf[5] = (u8)(len >> 8); ucr->cmd_buf[6] = (u8)len; ucr->cmd_buf[8] = (u8)(addr >> 8); ucr->cmd_buf[9] = (u8)addr; if (seq_type == SEQ_WRITE) ucr->cmd_buf[STAGE_FLAG] = 0; else ucr->cmd_buf[STAGE_FLAG] = STAGE_R; } static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr, u16 addr, u16 len, u8 *data) { u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4); if (!data) return -EINVAL; if (cmd_len > IOBUF_SIZE) return -EINVAL; rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE); memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len); return rtsx_usb_transfer_data(ucr, usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), ucr->cmd_buf, cmd_len, 0, NULL, 100); } static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr, u16 addr, u16 len, u8 *data) { int i, ret; u16 rsp_len = round_down(len, 4); u16 res_len = len - rsp_len; if (!data) return -EINVAL; /* 4-byte aligned part */ if (rsp_len) { rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ); ret = rtsx_usb_transfer_data(ucr, usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), ucr->cmd_buf, 12, 0, NULL, 100); if (ret) return ret; ret = rtsx_usb_transfer_data(ucr, usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), data, rsp_len, 0, NULL, 100); if (ret) return ret; } /* unaligned part */ for (i = 0; i < res_len; i++) { ret = rtsx_usb_read_register(ucr, addr + rsp_len + i, data + rsp_len + i); if (ret) return ret; } return 0; } int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) { return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); } EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf); int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) { return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); } EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf); int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data) { u16 value, index; addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT; value = swab16(addr); index = mask | data << 8; return usb_control_msg(ucr->pusb_dev, usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 100); } EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) { u16 value; u8 *buf; int ret; if (!data) return -EINVAL; buf = kzalloc(sizeof(u8), GFP_KERNEL); if (!buf) return -ENOMEM; addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; value = swab16(addr); ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, 0, buf, 1, 100); *data = *buf; kfree(buf); return ret; } EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr, u8 mask, u8 data) { int i; if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) { i = CMD_OFFSET + ucr->cmd_idx * 4; ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) | (u8)((reg_addr >> 8) & 0x3F); ucr->cmd_buf[i++] = (u8)reg_addr; ucr->cmd_buf[i++] = mask; ucr->cmd_buf[i++] = data; ucr->cmd_idx++; } } EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd); int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout) { int ret; ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8); ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx); ucr->cmd_buf[STAGE_FLAG] = flag; ret = rtsx_usb_transfer_data(ucr, usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET, 0, NULL, timeout); if (ret) { rtsx_usb_clear_fsm_err(ucr); return ret; } return 0; } EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd); int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout) { if (rsp_len <= 0) return -EINVAL; rsp_len = ALIGN(rsp_len, 4); return rtsx_usb_transfer_data(ucr, usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), ucr->rsp_buf, rsp_len, 0, NULL, timeout); } EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp); static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) { int ret; rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00); rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00); ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); if (ret) return ret; ret = rtsx_usb_get_rsp(ucr, 2, 100); if (ret) return ret; *status = ((ucr->rsp_buf[0] >> 2) & 0x0f) | ((ucr->rsp_buf[1] & 0x03) << 4); return 0; } int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; u16 *buf; if (!status) return -EINVAL; if (polling_pipe == 0) { buf = kzalloc(sizeof(u16), GFP_KERNEL); if (!buf) return -ENOMEM; ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_POLL, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, 2, 100); *status = *buf; kfree(buf); } else { ret = rtsx_usb_get_status_with_bulk(ucr, status); } /* usb_control_msg may return positive when success */ if (ret < 0) return ret; return 0; } EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status); static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val) { dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n", val, addr); rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, (addr >> 4) & 0x0F); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); return rtsx_usb_send_cmd(ucr, MODE_C, 100); } int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data) { rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data); return rtsx_usb_send_cmd(ucr, MODE_C, 100); } EXPORT_SYMBOL_GPL(rtsx_usb_write_register); int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) { int ret; if (data != NULL) *data = 0; rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0); ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); if (ret) return ret; ret = rtsx_usb_get_rsp(ucr, 1, 100); if (ret) return ret; if (data != NULL) *data = ucr->rsp_buf[0]; return 0; } EXPORT_SYMBOL_GPL(rtsx_usb_read_register); static inline u8 double_ssc_depth(u8 depth) { return (depth > 1) ? (depth - 1) : depth; } static u8 revise_ssc_depth(u8 ssc_depth, u8 div) { if (div > CLK_DIV_1) { if (ssc_depth > div - 1) ssc_depth -= (div - 1); else ssc_depth = SSC_DEPTH_2M; } return ssc_depth; } int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) { int ret; u8 n, clk_divider, mcu_cnt, div; if (!card_clock) { ucr->cur_clk = 0; return 0; } if (initial_mode) { /* We use 250k(around) here, in initial stage */ clk_divider = SD_CLK_DIVIDE_128; card_clock = 30000000; } else { clk_divider = SD_CLK_DIVIDE_0; } ret = rtsx_usb_write_register(ucr, SD_CFG1, SD_CLK_DIVIDE_MASK, clk_divider); if (ret < 0) return ret; card_clock /= 1000000; dev_dbg(&ucr->pusb_intf->dev, "Switch card clock to %dMHz\n", card_clock); if (!initial_mode && double_clk) card_clock *= 2; dev_dbg(&ucr->pusb_intf->dev, "Internal SSC clock: %dMHz (cur_clk = %d)\n", card_clock, ucr->cur_clk); if (card_clock == ucr->cur_clk) return 0; /* Converting clock value into internal settings: n and div */ n = card_clock - 2; if ((card_clock <= 2) || (n > MAX_DIV_N)) return -EINVAL; mcu_cnt = 60/card_clock + 3; if (mcu_cnt > 15) mcu_cnt = 15; /* Make sure that the SSC clock div_n is not less than MIN_DIV_N */ div = CLK_DIV_1; while (n < MIN_DIV_N && div < CLK_DIV_4) { n = (n + 2) * 2 - 2; div++; } dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div); if (double_clk) ssc_depth = double_ssc_depth(ssc_depth); ssc_depth = revise_ssc_depth(ssc_depth, div); dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth); rtsx_usb_init_cmd(ucr); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, 0x3F, (div << 4) | mcu_cnt); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2, SSC_DEPTH_MASK, ssc_depth); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); if (vpclk) { rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, PHASE_NOT_RESET, 0); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, PHASE_NOT_RESET, PHASE_NOT_RESET); } ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000); if (ret < 0) return ret; ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff, SSC_RSTB | SSC_8X_EN | SSC_SEL_4M); if (ret < 0) return ret; /* Wait SSC clock stable */ usleep_range(100, 1000); ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0); if (ret < 0) return ret; ucr->cur_clk = card_clock; return 0; } EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock); int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card) { int ret; u16 val; u16 cd_mask[] = { [RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD), [RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD) }; ret = rtsx_usb_get_card_status(ucr, &val); /* * If get status fails, return 0 (ok) for the exclusive check * and let the flow fail at somewhere else. */ if (ret) return 0; if (val & cd_mask[card]) return -EIO; return 0; } EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check); static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr) { int ret; u8 val; rtsx_usb_init_cmd(ucr); if (CHECK_PKG(ucr, LQFP48)) { rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, LDO3318_PWR_MASK, LDO_SUSPEND); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, FORCE_LDO_POWERB, FORCE_LDO_POWERB); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1, 0x30, 0x10); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5, 0x03, 0x01); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6, 0x0C, 0x04); } rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL, SD30_DRIVE_MASK, DRIVER_TYPE_D); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0); if (ucr->is_rts5179) rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5, 0x03, 0x01); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL, EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL); rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND, XD_INT | MS_INT | SD_INT, XD_INT | MS_INT | SD_INT); ret = rtsx_usb_send_cmd(ucr, MODE_C, 100); if (ret) return ret; /* config OCP */ rtsx_usb_write_register(ucr, OCPCTL, MS_OCP_DETECT_EN, MS_OCP_DETECT_EN); rtsx_usb_write_register(ucr, OCPPARA1, 0xF0, 0x50); rtsx_usb_write_register(ucr, OCPPARA2, 0x7, 0x3); /* config non-crystal mode */ rtsx_usb_read_register(ucr, CFG_MODE, &val); if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) { ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C); if (ret) return ret; } return 0; } static int rtsx_usb_init_chip(struct rtsx_ucr *ucr) { int ret; u8 val; rtsx_usb_clear_fsm_err(ucr); /* power on SSC */ ret = rtsx_usb_write_register(ucr, FPDCTL, SSC_POWER_MASK, SSC_POWER_ON); if (ret) return ret; usleep_range(100, 1000); ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00); if (ret) return ret; /* determine IC version */ ret = rtsx_usb_read_register(ucr, HW_VERSION, &val); if (ret) return ret; ucr->ic_version = val & HW_VER_MASK; /* determine package */ ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val); if (ret) return ret; if (val & CARD_SHARE_LQFP_SEL) { ucr->package = LQFP48; dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n"); } else { ucr->package = QFN24; dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n"); } /* determine IC variations */ rtsx_usb_read_register(ucr, CFG_MODE_1, &val); if (val & RTS5179) { ucr->is_rts5179 = true; dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n"); } else { ucr->is_rts5179 = false; } return rtsx_usb_reset_chip(ucr); } static int rtsx_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usb_dev = interface_to_usbdev(intf); struct rtsx_ucr *ucr; int ret; dev_dbg(&intf->dev, ": Realtek USB Card Reader found at bus %03d address %03d\n", usb_dev->bus->busnum, usb_dev->devnum); ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL); if (!ucr) return -ENOMEM; ucr->pusb_dev = usb_dev; ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); if (!ucr->cmd_buf) return -ENOMEM; ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); if (!ucr->rsp_buf) { ret = -ENOMEM; goto out_free_cmd_buf; } usb_set_intfdata(intf, ucr); ucr->vendor_id = id->idVendor; ucr->product_id = id->idProduct; mutex_init(&ucr->dev_mutex); ucr->pusb_intf = intf; /* initialize */ ret = rtsx_usb_init_chip(ucr); if (ret) goto out_init_fail; /* initialize USB SG transfer timer */ timer_setup(&ucr->sg_timer, rtsx_usb_sg_timed_out, 0); ret = mfd_add_hotplug_devices(&intf->dev, rtsx_usb_cells, ARRAY_SIZE(rtsx_usb_cells)); if (ret) goto out_init_fail; #ifdef CONFIG_PM intf->needs_remote_wakeup = 1; usb_enable_autosuspend(usb_dev); #endif return 0; out_init_fail: usb_set_intfdata(ucr->pusb_intf, NULL); kfree(ucr->rsp_buf); ucr->rsp_buf = NULL; out_free_cmd_buf: kfree(ucr->cmd_buf); ucr->cmd_buf = NULL; return ret; } static void rtsx_usb_disconnect(struct usb_interface *intf) { struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); dev_dbg(&intf->dev, "%s called\n", __func__); mfd_remove_devices(&intf->dev); usb_set_intfdata(ucr->pusb_intf, NULL); kfree(ucr->cmd_buf); ucr->cmd_buf = NULL; kfree(ucr->rsp_buf); ucr->rsp_buf = NULL; } #ifdef CONFIG_PM static int rtsx_usb_resume_child(struct device *dev, void *data) { pm_request_resume(dev); return 0; } static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) { struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); u16 val = 0; dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n", __func__, message.event); if (PMSG_IS_AUTO(message)) { if (mutex_trylock(&ucr->dev_mutex)) { rtsx_usb_get_card_status(ucr, &val); mutex_unlock(&ucr->dev_mutex); /* Defer the autosuspend if card exists */ if (val & (SD_CD | MS_CD)) { device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child); return -EAGAIN; } else { /* if the card does not exists, clear OCP status */ rtsx_usb_write_register(ucr, OCPCTL, MS_OCP_CLEAR, MS_OCP_CLEAR); } } else { /* There is an ongoing operation*/ return -EAGAIN; } } return 0; } static int rtsx_usb_resume(struct usb_interface *intf) { device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child); return 0; } static int rtsx_usb_reset_resume(struct usb_interface *intf) { struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); rtsx_usb_reset_chip(ucr); device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child); return 0; } #else /* CONFIG_PM */ #define rtsx_usb_suspend NULL #define rtsx_usb_resume NULL #define rtsx_usb_reset_resume NULL #endif /* CONFIG_PM */ static int rtsx_usb_pre_reset(struct usb_interface *intf) { struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); mutex_lock(&ucr->dev_mutex); return 0; } static int rtsx_usb_post_reset(struct usb_interface *intf) { struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); mutex_unlock(&ucr->dev_mutex); return 0; } static const struct usb_device_id rtsx_usb_usb_ids[] = { { USB_DEVICE(0x0BDA, 0x0129) }, { USB_DEVICE(0x0BDA, 0x0139) }, { USB_DEVICE(0x0BDA, 0x0140) }, { } }; MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids); static struct usb_driver rtsx_usb_driver = { .name = DRV_NAME_RTSX_USB, .probe = rtsx_usb_probe, .disconnect = rtsx_usb_disconnect, .suspend = rtsx_usb_suspend, .resume = rtsx_usb_resume, .reset_resume = rtsx_usb_reset_resume, .pre_reset = rtsx_usb_pre_reset, .post_reset = rtsx_usb_post_reset, .id_table = rtsx_usb_usb_ids, .supports_autosuspend = 1, .soft_unbind = 1, }; module_usb_driver(rtsx_usb_driver); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Roger Tseng <rogerable@realtek.com>"); MODULE_DESCRIPTION("Realtek USB Card Reader Driver");
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 // SPDX-License-Identifier: GPL-2.0-only /* * Sally Floyd's High Speed TCP (RFC 3649) congestion control * * See https://www.icir.org/floyd/hstcp.html * * John Heffner <jheffner@psc.edu> */ #include <linux/module.h> #include <net/tcp.h> /* From AIMD tables from RFC 3649 appendix B, * with fixed-point MD scaled <<8. */ static const struct hstcp_aimd_val { unsigned int cwnd; unsigned int md; } hstcp_aimd_vals[] = { { 38, 128, /* 0.50 */ }, { 118, 112, /* 0.44 */ }, { 221, 104, /* 0.41 */ }, { 347, 98, /* 0.38 */ }, { 495, 93, /* 0.37 */ }, { 663, 89, /* 0.35 */ }, { 851, 86, /* 0.34 */ }, { 1058, 83, /* 0.33 */ }, { 1284, 81, /* 0.32 */ }, { 1529, 78, /* 0.31 */ }, { 1793, 76, /* 0.30 */ }, { 2076, 74, /* 0.29 */ }, { 2378, 72, /* 0.28 */ }, { 2699, 71, /* 0.28 */ }, { 3039, 69, /* 0.27 */ }, { 3399, 68, /* 0.27 */ }, { 3778, 66, /* 0.26 */ }, { 4177, 65, /* 0.26 */ }, { 4596, 64, /* 0.25 */ }, { 5036, 62, /* 0.25 */ }, { 5497, 61, /* 0.24 */ }, { 5979, 60, /* 0.24 */ }, { 6483, 59, /* 0.23 */ }, { 7009, 58, /* 0.23 */ }, { 7558, 57, /* 0.22 */ }, { 8130, 56, /* 0.22 */ }, { 8726, 55, /* 0.22 */ }, { 9346, 54, /* 0.21 */ }, { 9991, 53, /* 0.21 */ }, { 10661, 52, /* 0.21 */ }, { 11358, 52, /* 0.20 */ }, { 12082, 51, /* 0.20 */ }, { 12834, 50, /* 0.20 */ }, { 13614, 49, /* 0.19 */ }, { 14424, 48, /* 0.19 */ }, { 15265, 48, /* 0.19 */ }, { 16137, 47, /* 0.19 */ }, { 17042, 46, /* 0.18 */ }, { 17981, 45, /* 0.18 */ }, { 18955, 45, /* 0.18 */ }, { 19965, 44, /* 0.17 */ }, { 21013, 43, /* 0.17 */ }, { 22101, 43, /* 0.17 */ }, { 23230, 42, /* 0.17 */ }, { 24402, 41, /* 0.16 */ }, { 25618, 41, /* 0.16 */ }, { 26881, 40, /* 0.16 */ }, { 28193, 39, /* 0.16 */ }, { 29557, 39, /* 0.15 */ }, { 30975, 38, /* 0.15 */ }, { 32450, 38, /* 0.15 */ }, { 33986, 37, /* 0.15 */ }, { 35586, 36, /* 0.14 */ }, { 37253, 36, /* 0.14 */ }, { 38992, 35, /* 0.14 */ }, { 40808, 35, /* 0.14 */ }, { 42707, 34, /* 0.13 */ }, { 44694, 33, /* 0.13 */ }, { 46776, 33, /* 0.13 */ }, { 48961, 32, /* 0.13 */ }, { 51258, 32, /* 0.13 */ }, { 53677, 31, /* 0.12 */ }, { 56230, 30, /* 0.12 */ }, { 58932, 30, /* 0.12 */ }, { 61799, 29, /* 0.12 */ }, { 64851, 28, /* 0.11 */ }, { 68113, 28, /* 0.11 */ }, { 71617, 27, /* 0.11 */ }, { 75401, 26, /* 0.10 */ }, { 79517, 26, /* 0.10 */ }, { 84035, 25, /* 0.10 */ }, { 89053, 24, /* 0.10 */ }, }; #define HSTCP_AIMD_MAX ARRAY_SIZE(hstcp_aimd_vals) struct hstcp { u32 ai; }; static void hstcp_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; /* Ensure the MD arithmetic works. This is somewhat pedantic, * since I don't think we will see a cwnd this large. :) */ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { /* Update AIMD parameters. * * We want to guarantee that: * hstcp_aimd_vals[ca->ai-1].cwnd < * snd_cwnd <= * hstcp_aimd_vals[ca->ai].cwnd */ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) { while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX - 1) ca->ai++; } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) { while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) ca->ai--; } /* Do additive increase */ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) { /* cwnd = cwnd + a(w) / cwnd */ tp->snd_cwnd_cnt += ca->ai + 1; if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp); tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } } } } static u32 hstcp_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hstcp_cong_avoid, .owner = THIS_MODULE, .name = "highspeed" }; static int __init hstcp_register(void) { BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } static void __exit hstcp_unregister(void) { tcp_unregister_congestion_control(&tcp_highspeed); } module_init(hstcp_register); module_exit(hstcp_unregister); MODULE_AUTHOR("John Heffner"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("High Speed TCP");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VMALLOC_H #define _LINUX_VMALLOC_H #include <linux/alloc_tag.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/list.h> #include <linux/llist.h> #include <asm/page.h> /* pgprot_t */ #include <linux/rbtree.h> #include <linux/overflow.h> #include <asm/vmalloc.h> struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ struct iov_iter; /* in uio.h */ /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ #define VM_ALLOC 0x00000002 /* vmalloc() */ #define VM_MAP 0x00000004 /* vmap()ed pages */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) #define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ #else #define VM_DEFER_KMEMLEAK 0 #endif #define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ /* * Maximum alignment for ioremap() regions. * Can be overridden by arch-specific value. */ #ifndef IOREMAP_MAX_ORDER #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ #endif struct vm_struct { struct vm_struct *next; void *addr; unsigned long size; unsigned long flags; struct page **pages; #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC unsigned int page_order; #endif unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; unsigned long requested_size; }; struct vmap_area { unsigned long va_start; unsigned long va_end; struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ /* * The following two variables can be packed, because * a vmap_area object can be either: * 1) in "free" tree (root is free_vmap_area_root) * 2) or "busy" tree (root is vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ }; unsigned long flags; /* mark type of vm_map_ram area */ }; /* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */ #ifndef arch_vmap_p4d_supported static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pmd_supported static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pte_range_map_size static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, u64 pfn, unsigned int max_page_shift) { return PAGE_SIZE; } #endif #ifndef arch_vmap_pte_range_unmap_size static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr, pte_t *ptep) { return PAGE_SIZE; } #endif #ifndef arch_vmap_pte_supported_shift static inline int arch_vmap_pte_supported_shift(unsigned long size) { return PAGE_SHIFT; } #endif #ifndef arch_vmap_pgprot_tagged static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) { return prot; } #endif /* * Highlevel APIs for driver use */ extern void vm_unmap_ram(const void *mem, unsigned int count); extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void); extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); #define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) extern void *vzalloc_noprof(unsigned long size) __alloc_size(1); #define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__)) extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1); #define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__)) extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1); #define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__)) extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1); #define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__)) extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1); #define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__)) extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1); #define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__)) extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); #define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__)) extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) __alloc_size(1); #define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__)) void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1); #define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__)) static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) { return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE); } extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__)) extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__)) extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) void *__must_check vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, gfp_t flags, int nid) __realloc_size(2); #define vrealloc_node_noprof(_p, _s, _f, _nid) \ vrealloc_node_align_noprof(_p, _s, 1, _f, _nid) #define vrealloc_noprof(_p, _s, _f) \ vrealloc_node_align_noprof(_p, _s, 1, _f, NUMA_NO_NODE) #define vrealloc_node_align(...) alloc_hooks(vrealloc_node_align_noprof(__VA_ARGS__)) #define vrealloc_node(...) alloc_hooks(vrealloc_node_noprof(__VA_ARGS__)) #define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot); extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); /* * Lowlevel-APIs (not for driver use!) */ static inline size_t get_vm_area_size(const struct vm_struct *area) { if (!(area->flags & VM_NO_GUARD)) /* return actual size without guard page */ return area->size - PAGE_SIZE; else return area->size; } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller); void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); struct vmap_area *find_vmap_area(unsigned long addr); static inline bool is_vm_area_hugepages(const void *addr) { /* * This may not 100% tell if the area is mapped with > PAGE_SIZE * page table entries, if for some reason the architecture indicates * larger sizes are available but decides not to use them, nothing * prevents that. This only indicates the size of the physical page * allocated in the vmalloc layer. */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC return find_vm_area(addr)->page_order > 0; #else return false; #endif } /* for /proc/kcore */ long vread_iter(struct iov_iter *iter, const char *addr, size_t count); /* * Internals. Don't use.. */ __init void vm_area_add_early(struct vm_struct *vm); __init void vm_area_register_early(struct vm_struct *vm, size_t align); int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); #ifdef CONFIG_MMU #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) unsigned long vmalloc_nr_pages(void); int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } #else /* !CONFIG_MMU */ #define VMALLOC_TOTAL 0UL static inline unsigned long vmalloc_nr_pages(void) { return 0; } static inline void set_vm_flush_reset_perms(void *addr) {} #endif /* CONFIG_MMU */ #if defined(CONFIG_MMU) && defined(CONFIG_SMP) struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); # else static inline struct vm_struct ** pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align) { return NULL; } static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {} #endif #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else static inline bool vmalloc_dump_obj(void *object) { return false; } #endif #endif /* _LINUX_VMALLOC_H */
2 2 1 4 2 2 2 2 6 6 8 8 7 5 8 5 5 3 5 5 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 // SPDX-License-Identifier: GPL-2.0-or-later /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> */ #include <net/flow.h> #include <net/ip.h> #include "ipvlan.h" static u32 ipvlan_jhash_secret __read_mostly; void ipvlan_init_secret(void) { net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); } void ipvlan_count_rx(const struct ipvl_dev *ipvlan, unsigned int len, bool success, bool mcast) { if (likely(success)) { struct ipvl_pcpu_stats *pcptr; pcptr = this_cpu_ptr(ipvlan->pcpu_stats); u64_stats_update_begin(&pcptr->syncp); u64_stats_inc(&pcptr->rx_pkts); u64_stats_add(&pcptr->rx_bytes, len); if (mcast) u64_stats_inc(&pcptr->rx_mcast); u64_stats_update_end(&pcptr->syncp); } else { this_cpu_inc(ipvlan->pcpu_stats->rx_errs); } } EXPORT_SYMBOL_GPL(ipvlan_count_rx); #if IS_ENABLED(CONFIG_IPV6) static u8 ipvlan_get_v6_hash(const void *iaddr) { const struct in6_addr *ip6_addr = iaddr; return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & IPVLAN_HASH_MASK; } #else static u8 ipvlan_get_v6_hash(const void *iaddr) { return 0; } #endif static u8 ipvlan_get_v4_hash(const void *iaddr) { const struct in_addr *ip4_addr = iaddr; return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & IPVLAN_HASH_MASK; } static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr) { if (!is_v6 && addr->atype == IPVL_IPV4) { struct in_addr *i4addr = (struct in_addr *)iaddr; return addr->ip4addr.s_addr == i4addr->s_addr; #if IS_ENABLED(CONFIG_IPV6) } else if (is_v6 && addr->atype == IPVL_IPV6) { struct in6_addr *i6addr = (struct in6_addr *)iaddr; return ipv6_addr_equal(&addr->ip6addr, i6addr); #endif } return false; } static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, const void *iaddr, bool is_v6) { struct ipvl_addr *addr; u8 hash; hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : ipvlan_get_v4_hash(iaddr); hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) if (addr_equal(is_v6, addr, iaddr)) return addr; return NULL; } void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) { struct ipvl_port *port = ipvlan->port; u8 hash; hash = (addr->atype == IPVL_IPV6) ? ipvlan_get_v6_hash(&addr->ip6addr) : ipvlan_get_v4_hash(&addr->ip4addr); if (hlist_unhashed(&addr->hlnode)) hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); } void ipvlan_ht_addr_del(struct ipvl_addr *addr) { hlist_del_init_rcu(&addr->hlnode); } struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6) { struct ipvl_addr *addr, *ret = NULL; rcu_read_lock(); list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { if (addr_equal(is_v6, addr, iaddr)) { ret = addr; break; } } rcu_read_unlock(); return ret; } bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) { struct ipvl_dev *ipvlan; bool ret = false; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { ret = true; break; } } rcu_read_unlock(); return ret; } void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) { void *lyr3h = NULL; switch (skb->protocol) { case htons(ETH_P_ARP): { struct arphdr *arph; if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) return NULL; arph = arp_hdr(skb); *type = IPVL_ARP; lyr3h = arph; break; } case htons(ETH_P_IP): { u32 pktlen; struct iphdr *ip4h; if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) return NULL; ip4h = ip_hdr(skb); pktlen = skb_ip_totlen(skb); if (ip4h->ihl < 5 || ip4h->version != 4) return NULL; if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) return NULL; *type = IPVL_IPV4; lyr3h = ip4h; break; } #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): { struct ipv6hdr *ip6h; if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) return NULL; ip6h = ipv6_hdr(skb); if (ip6h->version != 6) return NULL; *type = IPVL_IPV6; lyr3h = ip6h; /* Only Neighbour Solicitation pkts need different treatment */ if (ipv6_addr_any(&ip6h->saddr) && ip6h->nexthdr == NEXTHDR_ICMP) { struct icmp6hdr *icmph; if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) return NULL; ip6h = ipv6_hdr(skb); icmph = (struct icmp6hdr *)(ip6h + 1); if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { /* Need to access the ipv6 address in body */ if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) + sizeof(struct in6_addr)))) return NULL; ip6h = ipv6_hdr(skb); icmph = (struct icmp6hdr *)(ip6h + 1); } *type = IPVL_ICMPV6; lyr3h = icmph; } break; } #endif default: return NULL; } return lyr3h; } unsigned int ipvlan_mac_hash(const unsigned char *addr) { u32 hash = jhash_1word(get_unaligned((u32 *)(addr + 2)), ipvlan_jhash_secret); return hash & IPVLAN_MAC_FILTER_MASK; } void ipvlan_process_multicast(struct work_struct *work) { struct ipvl_port *port = container_of(work, struct ipvl_port, wq); struct ethhdr *ethh; struct ipvl_dev *ipvlan; struct sk_buff *skb, *nskb; struct sk_buff_head list; unsigned int len; unsigned int mac_hash; int ret; u8 pkt_type; bool tx_pkt; __skb_queue_head_init(&list); spin_lock_bh(&port->backlog.lock); skb_queue_splice_tail_init(&port->backlog, &list); spin_unlock_bh(&port->backlog.lock); while ((skb = __skb_dequeue(&list)) != NULL) { struct net_device *dev = skb->dev; bool consumed = false; ethh = eth_hdr(skb); tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; mac_hash = ipvlan_mac_hash(ethh->h_dest); if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) pkt_type = PACKET_BROADCAST; else pkt_type = PACKET_MULTICAST; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { if (tx_pkt && (ipvlan->dev == skb->dev)) continue; if (!test_bit(mac_hash, ipvlan->mac_filters)) continue; if (!(ipvlan->dev->flags & IFF_UP)) continue; ret = NET_RX_DROP; len = skb->len + ETH_HLEN; nskb = skb_clone(skb, GFP_ATOMIC); local_bh_disable(); if (nskb) { consumed = true; nskb->pkt_type = pkt_type; nskb->dev = ipvlan->dev; if (tx_pkt) ret = dev_forward_skb(ipvlan->dev, nskb); else ret = netif_rx(nskb); } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); local_bh_enable(); } rcu_read_unlock(); if (tx_pkt) { /* If the packet originated here, send it out. */ skb->dev = port->dev; skb->pkt_type = pkt_type; dev_queue_xmit(skb); } else { if (consumed) consume_skb(skb); else kfree_skb(skb); } dev_put(dev); cond_resched(); } } static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) { bool xnet = true; if (dev) xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); skb_scrub_packet(skb, xnet); if (dev) skb->dev = dev; } static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, bool local) { struct ipvl_dev *ipvlan = addr->master; struct net_device *dev = ipvlan->dev; unsigned int len; rx_handler_result_t ret = RX_HANDLER_CONSUMED; bool success = false; struct sk_buff *skb = *pskb; len = skb->len + ETH_HLEN; /* Only packets exchanged between two local slaves need to have * device-up check as well as skb-share check. */ if (local) { if (unlikely(!(dev->flags & IFF_UP))) { kfree_skb(skb); goto out; } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; *pskb = skb; } if (local) { skb->pkt_type = PACKET_HOST; if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) success = true; } else { skb->dev = dev; ret = RX_HANDLER_ANOTHER; success = true; } out: ipvlan_count_rx(ipvlan, len, success, false); return ret; } struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, int addr_type, bool use_dest) { struct ipvl_addr *addr = NULL; switch (addr_type) { #if IS_ENABLED(CONFIG_IPV6) case IPVL_IPV6: { struct ipv6hdr *ip6h; struct in6_addr *i6addr; ip6h = (struct ipv6hdr *)lyr3h; i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; addr = ipvlan_ht_addr_lookup(port, i6addr, true); break; } case IPVL_ICMPV6: { struct nd_msg *ndmh; struct in6_addr *i6addr; /* Make sure that the NeighborSolicitation ICMPv6 packets * are handled to avoid DAD issue. */ ndmh = (struct nd_msg *)lyr3h; if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { i6addr = &ndmh->target; addr = ipvlan_ht_addr_lookup(port, i6addr, true); } break; } #endif case IPVL_IPV4: { struct iphdr *ip4h; __be32 *i4addr; ip4h = (struct iphdr *)lyr3h; i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; addr = ipvlan_ht_addr_lookup(port, i4addr, false); break; } case IPVL_ARP: { struct arphdr *arph; unsigned char *arp_ptr; __be32 dip; arph = (struct arphdr *)lyr3h; arp_ptr = (unsigned char *)(arph + 1); if (use_dest) arp_ptr += (2 * port->dev->addr_len) + 4; else arp_ptr += port->dev->addr_len; memcpy(&dip, arp_ptr, 4); addr = ipvlan_ht_addr_lookup(port, &dip, false); break; } } return addr; } static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; const struct iphdr *ip4h; struct rtable *rt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, }; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) goto err; ip4h = ip_hdr(skb); fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; fl4.flowi4_dscp = ip4h_dscp(ip4h); rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); goto err; } skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); err = ip_local_out(net, NULL, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; goto out; err: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); out: return ret; } #if IS_ENABLED(CONFIG_IPV6) static noinline_for_stack int ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowi6_flags = FLOWI_FLAG_ANYSRC, .flowlabel = ip6_flowinfo(ip6h), .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; struct dst_entry *dst; int err; dst = ip6_route_output(dev_net(dev), NULL, &fl6); err = dst->error; if (err) { dst_release(dst); return err; } skb_dst_set(skb, dst); return 0; } static int ipvlan_process_v6_outbound(struct sk_buff *skb) { struct net_device *dev = skb->dev; int err, ret = NET_XMIT_DROP; if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return ret; } err = ipvlan_route_v6_outbound(dev, skb); if (unlikely(err)) { DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return err; } memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); err = ip6_local_out(dev_net(dev), NULL, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; return ret; } #else static int ipvlan_process_v6_outbound(struct sk_buff *skb) { return NET_XMIT_DROP; } #endif static int ipvlan_process_outbound(struct sk_buff *skb) { int ret = NET_XMIT_DROP; /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { /* In this mode we dont care about * multicast and broadcast traffic */ struct ethhdr *ethh = eth_hdr(skb); if (is_multicast_ether_addr(ethh->h_dest)) { pr_debug_ratelimited( "Dropped {multi|broad}cast of type=[%x]\n", ntohs(skb->protocol)); kfree_skb(skb); goto out; } skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); } if (skb->protocol == htons(ETH_P_IPV6)) ret = ipvlan_process_v6_outbound(skb); else if (skb->protocol == htons(ETH_P_IP)) ret = ipvlan_process_v4_outbound(skb); else { pr_warn_ratelimited("Dropped outbound packet type=%x\n", ntohs(skb->protocol)); kfree_skb(skb); } out: return ret; } static void ipvlan_multicast_enqueue(struct ipvl_port *port, struct sk_buff *skb, bool tx_pkt) { if (skb->protocol == htons(ETH_P_PAUSE)) { kfree_skb(skb); return; } /* Record that the deferred packet is from TX or RX path. By * looking at mac-addresses on packet will lead to erronus decisions. * (This would be true for a loopback-mode on master device or a * hair-pin mode of the switch.) */ IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; spin_lock(&port->backlog.lock); if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { dev_hold(skb->dev); __skb_queue_tail(&port->backlog, skb); spin_unlock(&port->backlog.lock); schedule_work(&port->wq); } else { spin_unlock(&port->backlog.lock); dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); } } static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); void *lyr3h; struct ipvl_addr *addr; int addr_type; lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); if (!lyr3h) goto out; if (!ipvlan_is_vepa(ipvlan->port)) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); if (addr) { if (ipvlan_is_private(ipvlan->port)) { consume_skb(skb); return NET_XMIT_DROP; } ipvlan_rcv_frame(addr, &skb, true); return NET_XMIT_SUCCESS; } } out: ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); return ipvlan_process_outbound(skb); } static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); struct ethhdr *eth = skb_eth_hdr(skb); struct ipvl_addr *addr; void *lyr3h; int addr_type; if (!ipvlan_is_vepa(ipvlan->port) && ether_addr_equal(eth->h_dest, eth->h_source)) { lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); if (lyr3h) { addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); if (addr) { if (ipvlan_is_private(ipvlan->port)) { consume_skb(skb); return NET_XMIT_DROP; } ipvlan_rcv_frame(addr, &skb, true); return NET_XMIT_SUCCESS; } } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return NET_XMIT_DROP; /* Packet definitely does not belong to any of the * virtual devices, but the dest is local. So forward * the skb for the main-dev. At the RX side we just return * RX_PASS for it to be processed further on the stack. */ dev_forward_skb(ipvlan->phy_dev, skb); return NET_XMIT_SUCCESS; } else if (is_multicast_ether_addr(eth->h_dest)) { skb_reset_mac_header(skb); ipvlan_skb_crossing_ns(skb, NULL); ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS; } skb->dev = ipvlan->phy_dev; return dev_queue_xmit(skb); } int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); if (!port) goto out; if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) goto out; switch(port->mode) { case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: #ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: #endif return ipvlan_xmit_mode_l3(skb, dev); } /* Should not reach here */ WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode); out: kfree_skb(skb); return NET_XMIT_DROP; } static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) { struct ethhdr *eth = eth_hdr(skb); struct ipvl_addr *addr; void *lyr3h; int addr_type; if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); if (!lyr3h) return true; addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); if (addr) return false; } return true; } static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, struct ipvl_port *port) { void *lyr3h; int addr_type; struct ipvl_addr *addr; struct sk_buff *skb = *pskb; rx_handler_result_t ret = RX_HANDLER_PASS; lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); if (!lyr3h) goto out; addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); if (addr) ret = ipvlan_rcv_frame(addr, pskb, false); out: return ret; } static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, struct ipvl_port *port) { struct sk_buff *skb = *pskb; struct ethhdr *eth = eth_hdr(skb); rx_handler_result_t ret = RX_HANDLER_PASS; if (is_multicast_ether_addr(eth->h_dest)) { if (ipvlan_external_frame(skb, port)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); /* External frames are queued for device local * distribution, but a copy is given to master * straight away to avoid sending duplicates later * when work-queue processes this frame. This is * achieved by returning RX_HANDLER_PASS. */ if (nskb) { ipvlan_skb_crossing_ns(nskb, NULL); ipvlan_multicast_enqueue(port, nskb, false); } } } else { /* Perform like l3 mode for non-multicast packet */ ret = ipvlan_handle_mode_l3(pskb, port); } return ret; } rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); if (!port) return RX_HANDLER_PASS; switch (port->mode) { case IPVLAN_MODE_L2: return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); #ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: return RX_HANDLER_PASS; #endif } /* Should not reach here */ WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode); kfree_skb(skb); return RX_HANDLER_CONSUMED; }
2 2 2 2 2 2 2 2 2 2 3 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 /* * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/rculist.h> #include <linux/llist.h> #include "rds_single_path.h" #include "ib_mr.h" #include "rds.h" struct workqueue_struct *rds_ib_mr_wq; static void rds_ib_odp_mr_worker(struct work_struct *work); static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) { struct rds_ib_device *rds_ibdev; struct rds_ib_ipaddr *i_ipaddr; rcu_read_lock(); list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { if (i_ipaddr->ipaddr == ipaddr) { refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } } } rcu_read_unlock(); return NULL; } static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) { struct rds_ib_ipaddr *i_ipaddr; i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL); if (!i_ipaddr) return -ENOMEM; i_ipaddr->ipaddr = ipaddr; spin_lock_irq(&rds_ibdev->spinlock); list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list); spin_unlock_irq(&rds_ibdev->spinlock); return 0; } static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) { struct rds_ib_ipaddr *i_ipaddr; struct rds_ib_ipaddr *to_free = NULL; spin_lock_irq(&rds_ibdev->spinlock); list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { if (i_ipaddr->ipaddr == ipaddr) { list_del_rcu(&i_ipaddr->list); to_free = i_ipaddr; break; } } spin_unlock_irq(&rds_ibdev->spinlock); if (to_free) kfree_rcu(to_free, rcu); } int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, struct in6_addr *ipaddr) { struct rds_ib_device *rds_ibdev_old; rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]); if (!rds_ibdev_old) return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); if (rds_ibdev_old != rds_ibdev) { rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]); rds_ib_dev_put(rds_ibdev_old); return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); } rds_ib_dev_put(rds_ibdev_old); return 0; } void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; /* conn was previously on the nodev_conns_list */ spin_lock_irq(&ib_nodev_conns_lock); BUG_ON(list_empty(&ib_nodev_conns)); BUG_ON(list_empty(&ic->ib_node)); list_del(&ic->ib_node); spin_lock(&rds_ibdev->spinlock); list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); spin_unlock(&rds_ibdev->spinlock); spin_unlock_irq(&ib_nodev_conns_lock); ic->rds_ibdev = rds_ibdev; refcount_inc(&rds_ibdev->refcount); } void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; /* place conn on nodev_conns_list */ spin_lock(&ib_nodev_conns_lock); spin_lock_irq(&rds_ibdev->spinlock); BUG_ON(list_empty(&ic->ib_node)); list_del(&ic->ib_node); spin_unlock_irq(&rds_ibdev->spinlock); list_add_tail(&ic->ib_node, &ib_nodev_conns); spin_unlock(&ib_nodev_conns_lock); ic->rds_ibdev = NULL; rds_ib_dev_put(rds_ibdev); } void rds_ib_destroy_nodev_conns(void) { struct rds_ib_connection *ic, *_ic; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&ib_nodev_conns_lock); list_splice(&ib_nodev_conns, &tmp_list); spin_unlock_irq(&ib_nodev_conns_lock); list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) rds_conn_destroy(ic->conn); } void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) { struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo->rdma_mr_max = pool_1m->max_items; iinfo->rdma_mr_size = pool_1m->max_pages; } #if IS_ENABLED(CONFIG_IPV6) void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds6_info_rdma_connection *iinfo6) { struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo6->rdma_mr_max = pool_1m->max_items; iinfo6->rdma_mr_size = pool_1m->max_pages; } #endif struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; struct llist_node *ret; unsigned long flags; spin_lock_irqsave(&pool->clean_lock, flags); ret = llist_del_first(&pool->clean_list); spin_unlock_irqrestore(&pool->clean_lock, flags); if (ret) { ibmr = llist_entry(ret, struct rds_ib_mr, llnode); if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_reused); else rds_ib_stats_inc(s_ib_rdma_mr_1m_reused); } return ibmr; } void rds_ib_sync_mr(void *trans_private, int direction) { struct rds_ib_mr *ibmr = trans_private; struct rds_ib_device *rds_ibdev = ibmr->device; if (ibmr->odp) return; switch (direction) { case DMA_FROM_DEVICE: ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg, ibmr->sg_dma_len, DMA_BIDIRECTIONAL); break; case DMA_TO_DEVICE: ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg, ibmr->sg_dma_len, DMA_BIDIRECTIONAL); break; } } void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) { struct rds_ib_device *rds_ibdev = ibmr->device; if (ibmr->sg_dma_len) { ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len, DMA_BIDIRECTIONAL); ibmr->sg_dma_len = 0; } /* Release the s/g list */ if (ibmr->sg_len) { unsigned int i; for (i = 0; i < ibmr->sg_len; ++i) { struct page *page = sg_page(&ibmr->sg[i]); /* FIXME we need a way to tell a r/w MR * from a r/o MR */ WARN_ON(!page->mapping && irqs_disabled()); set_page_dirty(page); put_page(page); } kfree(ibmr->sg); ibmr->sg = NULL; ibmr->sg_len = 0; } } void rds_ib_teardown_mr(struct rds_ib_mr *ibmr) { unsigned int pinned = ibmr->sg_len; __rds_ib_teardown_mr(ibmr); if (pinned) { struct rds_ib_mr_pool *pool = ibmr->pool; atomic_sub(pinned, &pool->free_pinned); } } static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all) { unsigned int item_count; item_count = atomic_read(&pool->item_count); if (free_all) return item_count; return 0; } /* * given an llist of mrs, put them all into the list_head for more processing */ static unsigned int llist_append_to_list(struct llist_head *llist, struct list_head *list) { struct rds_ib_mr *ibmr; struct llist_node *node; struct llist_node *next; unsigned int count = 0; node = llist_del_all(llist); while (node) { next = node->next; ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); node = next; count++; } return count; } /* * this takes a list head of mrs and turns it into linked llist nodes * of clusters. Each cluster has linked llist nodes of * MR_CLUSTER_SIZE mrs that are ready for reuse. */ static void list_to_llist_nodes(struct list_head *list, struct llist_node **nodes_head, struct llist_node **nodes_tail) { struct rds_ib_mr *ibmr; struct llist_node *cur = NULL; struct llist_node **next = nodes_head; list_for_each_entry(ibmr, list, unmap_list) { cur = &ibmr->llnode; *next = cur; next = &cur->next; } *next = NULL; *nodes_tail = cur; } /* * Flush our pool of MRs. * At a minimum, all currently unused MRs are unmapped. * If the number of MRs allocated exceeds the limit, we also try * to free as many MRs as needed to get back to this limit. */ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) { struct rds_ib_mr *ibmr; struct llist_node *clean_nodes; struct llist_node *clean_tail; LIST_HEAD(unmap_list); unsigned long unpinned = 0; unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush); if (ibmr_ret) { DEFINE_WAIT(wait); while (!mutex_trylock(&pool->flush_lock)) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) { *ibmr_ret = ibmr; finish_wait(&pool->flush_wait, &wait); goto out_nolock; } prepare_to_wait(&pool->flush_wait, &wait, TASK_UNINTERRUPTIBLE); if (llist_empty(&pool->clean_list)) schedule(); ibmr = rds_ib_reuse_mr(pool); if (ibmr) { *ibmr_ret = ibmr; finish_wait(&pool->flush_wait, &wait); goto out_nolock; } } finish_wait(&pool->flush_wait, &wait); } else mutex_lock(&pool->flush_lock); if (ibmr_ret) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) { *ibmr_ret = ibmr; goto out; } } /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) { unsigned long flags; spin_lock_irqsave(&pool->clean_lock, flags); llist_append_to_list(&pool->clean_list, &unmap_list); spin_unlock_irqrestore(&pool->clean_lock, flags); } free_goal = rds_ib_flush_goal(pool, free_all); if (list_empty(&unmap_list)) goto out; rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); if (!list_empty(&unmap_list)) { unsigned long flags; list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) { *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); clean_nodes = clean_nodes->next; } /* more than one entry in llist nodes */ if (clean_nodes) { spin_lock_irqsave(&pool->clean_lock, flags); llist_add_batch(clean_nodes, clean_tail, &pool->clean_list); spin_unlock_irqrestore(&pool->clean_lock, flags); } } atomic_sub(unpinned, &pool->free_pinned); atomic_sub(dirty_to_clean, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); out: mutex_unlock(&pool->flush_lock); if (waitqueue_active(&pool->flush_wait)) wake_up(&pool->flush_wait); out_nolock: return 0; } struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; int iter = 0; while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) return ibmr; if (atomic_inc_return(&pool->item_count) <= pool->max_items) break; atomic_dec(&pool->item_count); if (++iter > 2) { if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted); break; } /* We do have some empty MRs. Flush them out. */ if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait); rds_ib_flush_mr_pool(pool, 0, &ibmr); if (ibmr) return ibmr; } return NULL; } static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); rds_ib_flush_mr_pool(pool, 0, NULL); } void rds_ib_free_mr(void *trans_private, int invalidate) { struct rds_ib_mr *ibmr = trans_private; struct rds_ib_mr_pool *pool = ibmr->pool; struct rds_ib_device *rds_ibdev = ibmr->device; rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); if (ibmr->odp) { /* A MR created and marked as use_once. We use delayed work, * because there is a change that we are in interrupt and can't * call to ib_dereg_mr() directly. */ INIT_DELAYED_WORK(&ibmr->work, rds_ib_odp_mr_worker); queue_delayed_work(rds_ib_mr_wq, &ibmr->work, 0); return; } /* Return it to the pool's free list */ rds_ib_free_frmr_list(ibmr); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 5) queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { rds_ib_flush_mr_pool(pool, 0, NULL); } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); } } rds_ib_dev_put(rds_ibdev); } void rds_ib_flush_mrs(void) { struct rds_ib_device *rds_ibdev; down_read(&rds_ib_devices_lock); list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { if (rds_ibdev->mr_8k_pool) rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL); if (rds_ibdev->mr_1m_pool) rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL); } up_read(&rds_ib_devices_lock); } u32 rds_ib_get_lkey(void *trans_private) { struct rds_ib_mr *ibmr = trans_private; return ibmr->u.mr->lkey; } void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, struct rds_sock *rs, u32 *key_ret, struct rds_connection *conn, u64 start, u64 length, int need_odp) { struct rds_ib_device *rds_ibdev; struct rds_ib_mr *ibmr = NULL; struct rds_ib_connection *ic = NULL; int ret; rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]); if (!rds_ibdev) { ret = -ENODEV; goto out; } if (need_odp == ODP_ZEROBASED || need_odp == ODP_VIRTUAL) { u64 virt_addr = need_odp == ODP_ZEROBASED ? 0 : start; int access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_ON_DEMAND); struct ib_sge sge = {}; struct ib_mr *ib_mr; if (!rds_ibdev->odp_capable) { ret = -EOPNOTSUPP; goto out; } ib_mr = ib_reg_user_mr(rds_ibdev->pd, start, length, virt_addr, access_flags); if (IS_ERR(ib_mr)) { rdsdebug("rds_ib_get_user_mr returned %d\n", IS_ERR(ib_mr)); ret = PTR_ERR(ib_mr); goto out; } if (key_ret) *key_ret = ib_mr->rkey; ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); if (!ibmr) { ib_dereg_mr(ib_mr); ret = -ENOMEM; goto out; } ibmr->u.mr = ib_mr; ibmr->odp = 1; sge.addr = virt_addr; sge.length = length; sge.lkey = ib_mr->lkey; ib_advise_mr(rds_ibdev->pd, IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, IB_UVERBS_ADVISE_MR_FLAG_FLUSH, &sge, 1); return ibmr; } if (conn) ic = conn->c_transport_data; if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV; goto out; } ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); if (IS_ERR(ibmr)) { ret = PTR_ERR(ibmr); pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); } else { return ibmr; } out: if (rds_ibdev) rds_ib_dev_put(rds_ibdev); return ERR_PTR(ret); } void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) { cancel_delayed_work_sync(&pool->flush_worker); rds_ib_flush_mr_pool(pool, 1, NULL); WARN_ON(atomic_read(&pool->item_count)); WARN_ON(atomic_read(&pool->free_pinned)); kfree(pool); } struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, int pool_type) { struct rds_ib_mr_pool *pool; pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return ERR_PTR(-ENOMEM); pool->pool_type = pool_type; init_llist_head(&pool->free_list); init_llist_head(&pool->drop_list); init_llist_head(&pool->clean_list); spin_lock_init(&pool->clean_lock); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); if (pool_type == RDS_IB_MR_1M_POOL) { /* +1 allows for unaligned MRs */ pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_1m_mrs; } else { /* pool_type == RDS_IB_MR_8K_POOL */ pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_8k_mrs; } pool->max_free_pinned = pool->max_items * pool->max_pages / 4; pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; return pool; } int rds_ib_mr_init(void) { rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!rds_ib_mr_wq) return -ENOMEM; return 0; } /* By the time this is called all the IB devices should have been torn down and * had their pools freed. As each pool is freed its work struct is waited on, * so the pool flushing work queue should be idle by the time we get here. */ void rds_ib_mr_exit(void) { destroy_workqueue(rds_ib_mr_wq); } static void rds_ib_odp_mr_worker(struct work_struct *work) { struct rds_ib_mr *ibmr; ibmr = container_of(work, struct rds_ib_mr, work.work); ib_dereg_mr(ibmr->u.mr); kfree(ibmr); }
3248 8 8 8 8 1 1 7 7 7 30 25 6 23 8 8 8 7 6 7 1 1 1 1 1 31 31 24 6 30 1 24 24 3084 3098 3082 3078 3077 2962 2901 2899 2909 2901 2895 2867 2693 2687 2627 306 18 303 302 18 306 327 2 2679 2674 2689 2680 194 194 194 193 1 3055 3124 3064 3067 2684 2626 2654 2689 8 3 5 2885 2885 2899 2881 2888 2893 2883 2888 9 9 1 1 7 2 2887 2887 6 2886 2 11 2887 2997 2990 2996 2982 3005 3 3 2887 2890 3 3 3 159 44 2847 2846 5 189 12 181 181 183 180 43 43 19 32 32 12 12 12 4 4 1 3 3 5 5 1 1 3 3 3 3 3053 3018 3042 3055 3057 3041 120 118 2893 2891 2891 2885 2889 12 2900 2679 2683 2684 2684 2682 2675 2677 2688 2679 2682 2683 1 2679 2606 2607 51 52 51 15 37 2736 2699 2668 21 21 21 21 21 1 20 8 5 5 5 5 2 5 5 5 5 7 7 7 5 2 5 5 5 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/dir.c - kernfs directory implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/sched.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/idr.h> #include <linux/slab.h> #include <linux/security.h> #include <linux/hash.h> #include "kernfs-internal.h" /* * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to * call pr_cont() while holding rename_lock. Because sometimes pr_cont() * will perform wakeups when releasing console_sem. Holding rename_lock * will introduce deadlock if the scheduler reads the kernfs_name in the * wakeup path. */ static DEFINE_SPINLOCK(kernfs_pr_cont_lock); static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) static bool __kernfs_active(struct kernfs_node *kn) { return atomic_read(&kn->active) >= 0; } static bool kernfs_active(struct kernfs_node *kn) { lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); return __kernfs_active(kn); } static bool kernfs_lockdep(struct kernfs_node *kn) { #ifdef CONFIG_DEBUG_LOCK_ALLOC return kn->flags & KERNFS_LOCKDEP; #else return false; #endif } /* kernfs_node_depth - compute depth from @from to @to */ static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { size_t depth = 0; while (rcu_dereference(to->__parent) && to != from) { depth++; to = rcu_dereference(to->__parent); } return depth; } static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, struct kernfs_node *b) { size_t da, db; struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b); if (ra != rb) return NULL; da = kernfs_depth(ra->kn, a); db = kernfs_depth(rb->kn, b); while (da > db) { a = rcu_dereference(a->__parent); da--; } while (db > da) { b = rcu_dereference(b->__parent); db--; } /* worst case b and a will be the same at root */ while (b != a) { b = rcu_dereference(b->__parent); a = rcu_dereference(a->__parent); } return a; } /** * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to, * where kn_from is treated as root of the path. * @kn_from: kernfs node which should be treated as root for the path * @kn_to: kernfs node to which path is needed * @buf: buffer to copy the path into * @buflen: size of @buf * * We need to handle couple of scenarios here: * [1] when @kn_from is an ancestor of @kn_to at some level * kn_from: /n1/n2/n3 * kn_to: /n1/n2/n3/n4/n5 * result: /n4/n5 * * [2] when @kn_from is on a different hierarchy and we need to find common * ancestor between @kn_from and @kn_to. * kn_from: /n1/n2/n3/n4 * kn_to: /n1/n2/n5 * result: /../../n5 * OR * kn_from: /n1/n2/n3/n4/n5 [depth=5] * kn_to: /n1/n2/n3 [depth=3] * result: /../.. * * [3] when @kn_to is %NULL result will be "(null)" * * Return: the length of the constructed path. If the path would have been * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, struct kernfs_node *kn_from, char *buf, size_t buflen) { struct kernfs_node *kn, *common; const char parent_str[] = "/.."; size_t depth_from, depth_to, len = 0; ssize_t copied; int i, j; if (!kn_to) return strscpy(buf, "(null)", buflen); if (!kn_from) kn_from = kernfs_root(kn_to)->kn; if (kn_from == kn_to) return strscpy(buf, "/", buflen); common = kernfs_common_ancestor(kn_from, kn_to); if (WARN_ON(!common)) return -EINVAL; depth_to = kernfs_depth(common, kn_to); depth_from = kernfs_depth(common, kn_from); buf[0] = '\0'; for (i = 0; i < depth_from; i++) { copied = strscpy(buf + len, parent_str, buflen - len); if (copied < 0) return copied; len += copied; } /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { const char *name; for (kn = kn_to, j = 0; j < i; j++) kn = rcu_dereference(kn->__parent); name = rcu_dereference(kn->name); len += scnprintf(buf + len, buflen - len, "/%s", name); } return len; } /** * kernfs_name - obtain the name of a given node * @kn: kernfs_node of interest * @buf: buffer to copy @kn's name into * @buflen: size of @buf * * Copies the name of @kn into @buf of @buflen bytes. The behavior is * similar to strscpy(). * * Fills buffer with "(null)" if @kn is %NULL. * * Return: the resulting length of @buf. If @buf isn't long enough, * it's filled up to @buflen-1 and nul terminated, and returns -E2BIG. * * This function can be called from any context. */ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { struct kernfs_node *kn_parent; if (!kn) return strscpy(buf, "(null)", buflen); guard(rcu)(); /* * KERNFS_ROOT_INVARIANT_PARENT is ignored here. The name is RCU freed and * the parent is either existing or not. */ kn_parent = rcu_dereference(kn->__parent); return strscpy(buf, kn_parent ? rcu_dereference(kn->name) : "/", buflen); } /** * kernfs_path_from_node - build path of node @to relative to @from. * @from: parent kernfs_node relative to which we need to build the path * @to: kernfs_node of interest * @buf: buffer to copy @to's path into * @buflen: size of @buf * * Builds @to's path relative to @from in @buf. @from and @to must * be on the same kernfs-root. If @from is not parent of @to, then a relative * path (which includes '..'s) as needed to reach from @from to @to is * returned. * * Return: the length of the constructed path. If the path would have been * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, char *buf, size_t buflen) { struct kernfs_root *root; guard(rcu)(); if (to) { root = kernfs_root(to); if (!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)) { guard(read_lock_irqsave)(&root->kernfs_rename_lock); return kernfs_path_from_node_locked(to, from, buf, buflen); } } return kernfs_path_from_node_locked(to, from, buf, buflen); } EXPORT_SYMBOL_GPL(kernfs_path_from_node); /** * pr_cont_kernfs_name - pr_cont name of a kernfs_node * @kn: kernfs_node of interest * * This function can be called from any context. */ void pr_cont_kernfs_name(struct kernfs_node *kn) { unsigned long flags; spin_lock_irqsave(&kernfs_pr_cont_lock, flags); kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); pr_cont("%s", kernfs_pr_cont_buf); spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** * pr_cont_kernfs_path - pr_cont path of a kernfs_node * @kn: kernfs_node of interest * * This function can be called from any context. */ void pr_cont_kernfs_path(struct kernfs_node *kn) { unsigned long flags; int sz; spin_lock_irqsave(&kernfs_pr_cont_lock, flags); sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); if (sz < 0) { if (sz == -E2BIG) pr_cont("(name too long)"); else pr_cont("(error)"); goto out; } pr_cont("%s", kernfs_pr_cont_buf); out: spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** * kernfs_get_parent - determine the parent node and pin it * @kn: kernfs_node of interest * * Determines @kn's parent, pins and returns it. This function can be * called from any context. * * Return: parent node of @kn */ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { struct kernfs_node *parent; struct kernfs_root *root; unsigned long flags; root = kernfs_root(kn); read_lock_irqsave(&root->kernfs_rename_lock, flags); parent = kernfs_parent(kn); kernfs_get(parent); read_unlock_irqrestore(&root->kernfs_rename_lock, flags); return parent; } /** * kernfs_name_hash - calculate hash of @ns + @name * @name: Null terminated string to hash * @ns: Namespace tag to hash * * Return: 31-bit hash of ns + name (so it fits in an off_t) */ static unsigned int kernfs_name_hash(const char *name, const void *ns) { unsigned long hash = init_name_hash(ns); unsigned int len = strlen(name); while (len--) hash = partial_name_hash(*name++, hash); hash = end_name_hash(hash); hash &= 0x7fffffffU; /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ if (hash < 2) hash += 2; if (hash >= INT_MAX) hash = INT_MAX - 1; return hash; } static int kernfs_name_compare(unsigned int hash, const char *name, const void *ns, const struct kernfs_node *kn) { if (hash < kn->hash) return -1; if (hash > kn->hash) return 1; if (ns < kn->ns) return -1; if (ns > kn->ns) return 1; return strcmp(name, kernfs_rcu_name(kn)); } static int kernfs_sd_compare(const struct kernfs_node *left, const struct kernfs_node *right) { return kernfs_name_compare(left->hash, kernfs_rcu_name(left), left->ns, right); } /** * kernfs_link_sibling - link kernfs_node into sibling rbtree * @kn: kernfs_node of interest * * Link @kn into its sibling rbtree which starts from * @kn->parent->dir.children. * * Locking: * kernfs_rwsem held exclusive * * Return: * %0 on success, -EEXIST on failure. */ static int kernfs_link_sibling(struct kernfs_node *kn) { struct rb_node *parent = NULL; struct kernfs_node *kn_parent; struct rb_node **node; kn_parent = kernfs_parent(kn); node = &kn_parent->dir.children.rb_node; while (*node) { struct kernfs_node *pos; int result; pos = rb_to_kn(*node); parent = *node; result = kernfs_sd_compare(kn, pos); if (result < 0) node = &pos->rb.rb_left; else if (result > 0) node = &pos->rb.rb_right; else return -EEXIST; } /* add new node and rebalance the tree */ rb_link_node(&kn->rb, parent, node); rb_insert_color(&kn->rb, &kn_parent->dir.children); /* successfully added, account subdir number */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) kn_parent->dir.subdirs++; kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); return 0; } /** * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree * @kn: kernfs_node of interest * * Try to unlink @kn from its sibling rbtree which starts from * kn->parent->dir.children. * * Return: %true if @kn was actually removed, * %false if @kn wasn't on the rbtree. * * Locking: * kernfs_rwsem held exclusive */ static bool kernfs_unlink_sibling(struct kernfs_node *kn) { struct kernfs_node *kn_parent; if (RB_EMPTY_NODE(&kn->rb)) return false; kn_parent = kernfs_parent(kn); down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) kn_parent->dir.subdirs--; kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); rb_erase(&kn->rb, &kn_parent->dir.children); RB_CLEAR_NODE(&kn->rb); return true; } /** * kernfs_get_active - get an active reference to kernfs_node * @kn: kernfs_node to get an active reference to * * Get an active reference of @kn. This function is noop if @kn * is %NULL. * * Return: * Pointer to @kn on success, %NULL on failure. */ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) { if (unlikely(!kn)) return NULL; if (!atomic_inc_unless_negative(&kn->active)) return NULL; if (kernfs_lockdep(kn)) rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); return kn; } /** * kernfs_put_active - put an active reference to kernfs_node * @kn: kernfs_node to put an active reference to * * Put an active reference to @kn. This function is noop if @kn * is %NULL. */ void kernfs_put_active(struct kernfs_node *kn) { int v; if (unlikely(!kn)) return; if (kernfs_lockdep(kn)) rwsem_release(&kn->dep_map, _RET_IP_); v = atomic_dec_return(&kn->active); if (likely(v != KN_DEACTIVATED_BIAS)) return; wake_up_all(&kernfs_root(kn)->deactivate_waitq); } /** * kernfs_drain - drain kernfs_node * @kn: kernfs_node to drain * * Drain existing usages and nuke all existing mmaps of @kn. Multiple * removers may invoke this function concurrently on @kn and all will * return after draining is complete. */ static void kernfs_drain(struct kernfs_node *kn) __releases(&kernfs_root(kn)->kernfs_rwsem) __acquires(&kernfs_root(kn)->kernfs_rwsem) { struct kernfs_root *root = kernfs_root(kn); lockdep_assert_held_write(&root->kernfs_rwsem); WARN_ON_ONCE(kernfs_active(kn)); /* * Skip draining if already fully drained. This avoids draining and its * lockdep annotations for nodes which have never been activated * allowing embedding kernfs_remove() in create error paths without * worrying about draining. */ if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS && !kernfs_should_drain_open_files(kn)) return; up_write(&root->kernfs_rwsem); if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) lock_contended(&kn->dep_map, _RET_IP_); } wait_event(root->deactivate_waitq, atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); if (kernfs_lockdep(kn)) { lock_acquired(&kn->dep_map, _RET_IP_); rwsem_release(&kn->dep_map, _RET_IP_); } if (kernfs_should_drain_open_files(kn)) kernfs_drain_open_files(kn); down_write(&root->kernfs_rwsem); } /** * kernfs_get - get a reference count on a kernfs_node * @kn: the target kernfs_node */ void kernfs_get(struct kernfs_node *kn) { if (kn) { WARN_ON(!atomic_read(&kn->count)); atomic_inc(&kn->count); } } EXPORT_SYMBOL_GPL(kernfs_get); static void kernfs_free_rcu(struct rcu_head *rcu) { struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu); /* If the whole node goes away, then name can't be used outside */ kfree_const(rcu_access_pointer(kn->name)); if (kn->iattr) { simple_xattrs_free(&kn->iattr->xattrs, NULL); kmem_cache_free(kernfs_iattrs_cache, kn->iattr); } kmem_cache_free(kernfs_node_cache, kn); } /** * kernfs_put - put a reference count on a kernfs_node * @kn: the target kernfs_node * * Put a reference count of @kn and destroy it if it reached zero. */ void kernfs_put(struct kernfs_node *kn) { struct kernfs_node *parent; struct kernfs_root *root; if (!kn || !atomic_dec_and_test(&kn->count)) return; root = kernfs_root(kn); repeat: /* * Moving/renaming is always done while holding reference. * kn->parent won't change beneath us. */ parent = kernfs_parent(kn); WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, "kernfs_put: %s/%s: released with incorrect active_ref %d\n", parent ? rcu_dereference(parent->name) : "", rcu_dereference(kn->name), atomic_read(&kn->active)); if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); spin_lock(&root->kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); spin_unlock(&root->kernfs_idr_lock); call_rcu(&kn->rcu, kernfs_free_rcu); kn = parent; if (kn) { if (atomic_dec_and_test(&kn->count)) goto repeat; } else { /* just released the root kn, free @root too */ idr_destroy(&root->ino_idr); kfree_rcu(root, rcu); } } EXPORT_SYMBOL_GPL(kernfs_put); /** * kernfs_node_from_dentry - determine kernfs_node associated with a dentry * @dentry: the dentry in question * * Return: the kernfs_node associated with @dentry. If @dentry is not a * kernfs one, %NULL is returned. * * While the returned kernfs_node will stay accessible as long as @dentry * is accessible, the returned node can be in any state and the caller is * fully responsible for determining what's accessible. */ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) { if (dentry->d_sb->s_op == &kernfs_sops) return kernfs_dentry_node(dentry); return NULL; } static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, unsigned flags) { struct kernfs_node *kn; u32 id_highbits; int ret; name = kstrdup_const(name, GFP_KERNEL); if (!name) return NULL; kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); if (!kn) goto err_out1; idr_preload(GFP_KERNEL); spin_lock(&root->kernfs_idr_lock); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); if (ret >= 0 && ret < root->last_id_lowbits) root->id_highbits++; id_highbits = root->id_highbits; root->last_id_lowbits = ret; spin_unlock(&root->kernfs_idr_lock); idr_preload_end(); if (ret < 0) goto err_out2; kn->id = (u64)id_highbits << 32 | ret; atomic_set(&kn->count, 1); atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); rcu_assign_pointer(kn->name, name); kn->mode = mode; kn->flags = flags; if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) { struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, .ia_uid = uid, .ia_gid = gid, }; ret = __kernfs_setattr(kn, &iattr); if (ret < 0) goto err_out3; } if (parent) { ret = security_kernfs_init_security(parent, kn); if (ret) goto err_out3; } return kn; err_out3: spin_lock(&root->kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); spin_unlock(&root->kernfs_idr_lock); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: kfree_const(name); return NULL; } struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, unsigned flags) { struct kernfs_node *kn; if (parent->mode & S_ISGID) { /* this code block imitates inode_init_owner() for * kernfs */ if (parent->iattr) gid = parent->iattr->ia_gid; if (flags & KERNFS_DIR) mode |= S_ISGID; } kn = __kernfs_new_node(kernfs_root(parent), parent, name, mode, uid, gid, flags); if (kn) { kernfs_get(parent); rcu_assign_pointer(kn->__parent, parent); } return kn; } /* * kernfs_find_and_get_node_by_id - get kernfs_node from node id * @root: the kernfs root * @id: the target node id * * @id's lower 32bits encode ino and upper gen. If the gen portion is * zero, all generations are matched. * * Return: %NULL on failure, * otherwise a kernfs node with reference counter incremented. */ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id) { struct kernfs_node *kn; ino_t ino = kernfs_id_ino(id); u32 gen = kernfs_id_gen(id); rcu_read_lock(); kn = idr_find(&root->ino_idr, (u32)ino); if (!kn) goto err_unlock; if (sizeof(ino_t) >= sizeof(u64)) { /* we looked up with the low 32bits, compare the whole */ if (kernfs_ino(kn) != ino) goto err_unlock; } else { /* 0 matches all generations */ if (unlikely(gen && kernfs_gen(kn) != gen)) goto err_unlock; } /* * We should fail if @kn has never been activated and guarantee success * if the caller knows that @kn is active. Both can be achieved by * __kernfs_active() which tests @kn->active without kernfs_rwsem. */ if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) goto err_unlock; rcu_read_unlock(); return kn; err_unlock: rcu_read_unlock(); return NULL; } /** * kernfs_add_one - add kernfs_node to parent without warning * @kn: kernfs_node to be added * * The caller must already have initialized @kn->parent. This * function increments nlink of the parent's inode if @kn is a * directory and link into the children list of the parent. * * Return: * %0 on success, -EEXIST if entry with the given name already * exists. */ int kernfs_add_one(struct kernfs_node *kn) { struct kernfs_root *root = kernfs_root(kn); struct kernfs_iattrs *ps_iattr; struct kernfs_node *parent; bool has_ns; int ret; down_write(&root->kernfs_rwsem); parent = kernfs_parent(kn); ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", has_ns ? "required" : "invalid", kernfs_rcu_name(parent), kernfs_rcu_name(kn))) goto out_unlock; if (kernfs_type(parent) != KERNFS_DIR) goto out_unlock; ret = -ENOENT; if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR)) goto out_unlock; kn->hash = kernfs_name_hash(kernfs_rcu_name(kn), kn->ns); ret = kernfs_link_sibling(kn); if (ret) goto out_unlock; /* Update timestamps on the parent */ down_write(&root->kernfs_iattr_rwsem); ps_iattr = parent->iattr; if (ps_iattr) { ktime_get_real_ts64(&ps_iattr->ia_ctime); ps_iattr->ia_mtime = ps_iattr->ia_ctime; } up_write(&root->kernfs_iattr_rwsem); up_write(&root->kernfs_rwsem); /* * Activate the new node unless CREATE_DEACTIVATED is requested. * If not activated here, the kernfs user is responsible for * activating the node with kernfs_activate(). A node which hasn't * been activated is not visible to userland and its removal won't * trigger deactivation. */ if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) kernfs_activate(kn); return 0; out_unlock: up_write(&root->kernfs_rwsem); return ret; } /** * kernfs_find_ns - find kernfs_node with the given name * @parent: kernfs_node to search under * @name: name to look for * @ns: the namespace tag to use * * Look for kernfs_node with name @name under @parent. * * Return: pointer to the found kernfs_node on success, %NULL on failure. */ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, const unsigned char *name, const void *ns) { struct rb_node *node = parent->dir.children.rb_node; bool has_ns = kernfs_ns_enabled(parent); unsigned int hash; lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem); if (has_ns != (bool)ns) { WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", has_ns ? "required" : "invalid", kernfs_rcu_name(parent), name); return NULL; } hash = kernfs_name_hash(name, ns); while (node) { struct kernfs_node *kn; int result; kn = rb_to_kn(node); result = kernfs_name_compare(hash, name, ns, kn); if (result < 0) node = node->rb_left; else if (result > 0) node = node->rb_right; else return kn; } return NULL; } static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, const unsigned char *path, const void *ns) { ssize_t len; char *p, *name; lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); spin_lock_irq(&kernfs_pr_cont_lock); len = strscpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); if (len < 0) { spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } p = kernfs_pr_cont_buf; while ((name = strsep(&p, "/")) && parent) { if (*name == '\0') continue; parent = kernfs_find_ns(parent, name, ns); } spin_unlock_irq(&kernfs_pr_cont_lock); return parent; } /** * kernfs_find_and_get_ns - find and get kernfs_node with the given name * @parent: kernfs_node to search under * @name: name to look for * @ns: the namespace tag to use * * Look for kernfs_node with name @name under @parent and get a reference * if found. This function may sleep. * * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); down_read(&root->kernfs_rwsem); kn = kernfs_find_ns(parent, name, ns); kernfs_get(kn); up_read(&root->kernfs_rwsem); return kn; } EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); /** * kernfs_walk_and_get_ns - find and get kernfs_node with the given path * @parent: kernfs_node to search under * @path: path to look for * @ns: the namespace tag to use * * Look for kernfs_node with path @path under @parent and get a reference * if found. This function may sleep. * * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); down_read(&root->kernfs_rwsem); kn = kernfs_walk_ns(parent, path, ns); kernfs_get(kn); up_read(&root->kernfs_rwsem); return kn; } unsigned int kernfs_root_flags(struct kernfs_node *kn) { return kernfs_root(kn)->flags; } /** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy * @flags: KERNFS_ROOT_* flags * @priv: opaque data associated with the new directory * * Return: the root of the new hierarchy on success, ERR_PTR() value on * failure. */ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) { struct kernfs_root *root; struct kernfs_node *kn; root = kzalloc(sizeof(*root), GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); idr_init(&root->ino_idr); spin_lock_init(&root->kernfs_idr_lock); init_rwsem(&root->kernfs_rwsem); init_rwsem(&root->kernfs_iattr_rwsem); init_rwsem(&root->kernfs_supers_rwsem); INIT_LIST_HEAD(&root->supers); rwlock_init(&root->kernfs_rename_lock); /* * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino. * High bits generation. The starting value for both ino and * genenration is 1. Initialize upper 32bit allocation * accordingly. */ if (sizeof(ino_t) >= sizeof(u64)) root->id_highbits = 0; else root->id_highbits = 1; kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) { idr_destroy(&root->ino_idr); kfree(root); return ERR_PTR(-ENOMEM); } kn->priv = priv; kn->dir.root = root; root->syscall_ops = scops; root->flags = flags; root->kn = kn; init_waitqueue_head(&root->deactivate_waitq); if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) kernfs_activate(kn); return root; } /** * kernfs_destroy_root - destroy a kernfs hierarchy * @root: root of the hierarchy to destroy * * Destroy the hierarchy anchored at @root by removing all existing * directories and destroying @root. */ void kernfs_destroy_root(struct kernfs_root *root) { /* * kernfs_remove holds kernfs_rwsem from the root so the root * shouldn't be freed during the operation. */ kernfs_get(root->kn); kernfs_remove(root->kn); kernfs_put(root->kn); /* will also free @root */ } /** * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root * @root: root to use to lookup * * Return: @root's kernfs_node */ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) { return root->kn; } /** * kernfs_create_dir_ns - create a directory * @parent: parent in which to create a new directory * @name: name of the new directory * @mode: mode of the new directory * @uid: uid of the new directory * @gid: gid of the new directory * @priv: opaque data associated with the new directory * @ns: optional namespace tag of the directory * * Return: the created node on success, ERR_PTR() value on failure. */ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns) { struct kernfs_node *kn; int rc; /* allocate */ kn = kernfs_new_node(parent, name, mode | S_IFDIR, uid, gid, KERNFS_DIR); if (!kn) return ERR_PTR(-ENOMEM); kn->dir.root = parent->dir.root; kn->ns = ns; kn->priv = priv; /* link in */ rc = kernfs_add_one(kn); if (!rc) return kn; kernfs_put(kn); return ERR_PTR(rc); } /** * kernfs_create_empty_dir - create an always empty directory * @parent: parent in which to create a new directory * @name: name of the new directory * * Return: the created node on success, ERR_PTR() value on failure. */ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name) { struct kernfs_node *kn; int rc; /* allocate */ kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) return ERR_PTR(-ENOMEM); kn->flags |= KERNFS_EMPTY_DIR; kn->dir.root = parent->dir.root; kn->ns = NULL; kn->priv = NULL; /* link in */ rc = kernfs_add_one(kn); if (!rc) return kn; kernfs_put(kn); return ERR_PTR(rc); } static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { struct kernfs_node *kn, *parent; struct kernfs_root *root; if (flags & LOOKUP_RCU) return -ECHILD; /* Negative hashed dentry? */ if (d_really_is_negative(dentry)) { /* If the kernfs parent node has changed discard and * proceed to ->lookup. * * There's nothing special needed here when getting the * dentry parent, even if a concurrent rename is in * progress. That's because the dentry is negative so * it can only be the target of the rename and it will * be doing a d_move() not a replace. Consequently the * dentry d_parent won't change over the d_move(). * * Also kernfs negative dentries transitioning from * negative to positive during revalidate won't happen * because they are invalidated on containing directory * changes and the lookup re-done so that a new positive * dentry can be properly created. */ root = kernfs_root_from_sb(dentry->d_sb); down_read(&root->kernfs_rwsem); parent = kernfs_dentry_node(dentry->d_parent); if (parent) { if (kernfs_dir_changed(parent, dentry)) { up_read(&root->kernfs_rwsem); return 0; } } up_read(&root->kernfs_rwsem); /* The kernfs parent node hasn't changed, leave the * dentry negative and return success. */ return 1; } kn = kernfs_dentry_node(dentry); root = kernfs_root(kn); down_read(&root->kernfs_rwsem); /* The kernfs node has been deactivated */ if (!kernfs_active(kn)) goto out_bad; parent = kernfs_parent(kn); /* The kernfs node has been moved? */ if (kernfs_dentry_node(dentry->d_parent) != parent) goto out_bad; /* The kernfs node has been renamed */ if (strcmp(dentry->d_name.name, kernfs_rcu_name(kn)) != 0) goto out_bad; /* The kernfs node has been moved to a different namespace */ if (parent && kernfs_ns_enabled(parent) && kernfs_info(dentry->d_sb)->ns != kn->ns) goto out_bad; up_read(&root->kernfs_rwsem); return 1; out_bad: up_read(&root->kernfs_rwsem); return 0; } const struct dentry_operations kernfs_dops = { .d_revalidate = kernfs_dop_revalidate, }; static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct kernfs_node *parent = dir->i_private; struct kernfs_node *kn; struct kernfs_root *root; struct inode *inode = NULL; const void *ns = NULL; root = kernfs_root(parent); down_read(&root->kernfs_rwsem); if (kernfs_ns_enabled(parent)) ns = kernfs_info(dir->i_sb)->ns; kn = kernfs_find_ns(parent, dentry->d_name.name, ns); /* attach dentry and inode */ if (kn) { /* Inactive nodes are invisible to the VFS so don't * create a negative. */ if (!kernfs_active(kn)) { up_read(&root->kernfs_rwsem); return NULL; } inode = kernfs_get_inode(dir->i_sb, kn); if (!inode) inode = ERR_PTR(-ENOMEM); } /* * Needed for negative dentry validation. * The negative dentry can be created in kernfs_iop_lookup() * or transforms from positive dentry in dentry_unlink_inode() * called from vfs_rmdir(). */ if (!IS_ERR(inode)) kernfs_set_rev(parent, dentry); up_read(&root->kernfs_rwsem); /* instantiate and hash (possibly negative) dentry */ return d_splice_alias(inode, dentry); } static struct dentry *kernfs_iop_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct kernfs_node *parent = dir->i_private; struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; int ret; if (!scops || !scops->mkdir) return ERR_PTR(-EPERM); if (!kernfs_get_active(parent)) return ERR_PTR(-ENODEV); ret = scops->mkdir(parent, dentry->d_name.name, mode); kernfs_put_active(parent); return ERR_PTR(ret); } static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) { struct kernfs_node *kn = kernfs_dentry_node(dentry); struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; int ret; if (!scops || !scops->rmdir) return -EPERM; if (!kernfs_get_active(kn)) return -ENODEV; ret = scops->rmdir(kn); kernfs_put_active(kn); return ret; } static int kernfs_iop_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct kernfs_node *kn = kernfs_dentry_node(old_dentry); struct kernfs_node *new_parent = new_dir->i_private; struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; int ret; if (flags) return -EINVAL; if (!scops || !scops->rename) return -EPERM; if (!kernfs_get_active(kn)) return -ENODEV; if (!kernfs_get_active(new_parent)) { kernfs_put_active(kn); return -ENODEV; } ret = scops->rename(kn, new_parent, new_dentry->d_name.name); kernfs_put_active(new_parent); kernfs_put_active(kn); return ret; } const struct inode_operations kernfs_dir_iops = { .lookup = kernfs_iop_lookup, .permission = kernfs_iop_permission, .setattr = kernfs_iop_setattr, .getattr = kernfs_iop_getattr, .listxattr = kernfs_iop_listxattr, .mkdir = kernfs_iop_mkdir, .rmdir = kernfs_iop_rmdir, .rename = kernfs_iop_rename, }; static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) { struct kernfs_node *last; while (true) { struct rb_node *rbn; last = pos; if (kernfs_type(pos) != KERNFS_DIR) break; rbn = rb_first(&pos->dir.children); if (!rbn) break; pos = rb_to_kn(rbn); } return last; } /** * kernfs_next_descendant_post - find the next descendant for post-order walk * @pos: the current position (%NULL to initiate traversal) * @root: kernfs_node whose descendants to walk * * Find the next descendant to visit for post-order traversal of @root's * descendants. @root is included in the iteration and the last node to be * visited. * * Return: the next descendant to visit or %NULL when done. */ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, struct kernfs_node *root) { struct rb_node *rbn; lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem); /* if first iteration, visit leftmost descendant which may be root */ if (!pos) return kernfs_leftmost_descendant(root); /* if we visited @root, we're done */ if (pos == root) return NULL; /* if there's an unvisited sibling, visit its leftmost descendant */ rbn = rb_next(&pos->rb); if (rbn) return kernfs_leftmost_descendant(rb_to_kn(rbn)); /* no sibling left, visit parent */ return kernfs_parent(pos); } static void kernfs_activate_one(struct kernfs_node *kn) { lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); kn->flags |= KERNFS_ACTIVATED; if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING))) return; WARN_ON_ONCE(rcu_access_pointer(kn->__parent) && RB_EMPTY_NODE(&kn->rb)); WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); } /** * kernfs_activate - activate a node which started deactivated * @kn: kernfs_node whose subtree is to be activated * * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node * needs to be explicitly activated. A node which hasn't been activated * isn't visible to userland and deactivation is skipped during its * removal. This is useful to construct atomic init sequences where * creation of multiple nodes should either succeed or fail atomically. * * The caller is responsible for ensuring that this function is not called * after kernfs_remove*() is invoked on @kn. */ void kernfs_activate(struct kernfs_node *kn) { struct kernfs_node *pos; struct kernfs_root *root = kernfs_root(kn); down_write(&root->kernfs_rwsem); pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) kernfs_activate_one(pos); up_write(&root->kernfs_rwsem); } /** * kernfs_show - show or hide a node * @kn: kernfs_node to show or hide * @show: whether to show or hide * * If @show is %false, @kn is marked hidden and deactivated. A hidden node is * ignored in future activaitons. If %true, the mark is removed and activation * state is restored. This function won't implicitly activate a new node in a * %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet. * * To avoid recursion complexities, directories aren't supported for now. */ void kernfs_show(struct kernfs_node *kn, bool show) { struct kernfs_root *root = kernfs_root(kn); if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR)) return; down_write(&root->kernfs_rwsem); if (show) { kn->flags &= ~KERNFS_HIDDEN; if (kn->flags & KERNFS_ACTIVATED) kernfs_activate_one(kn); } else { kn->flags |= KERNFS_HIDDEN; if (kernfs_active(kn)) atomic_add(KN_DEACTIVATED_BIAS, &kn->active); kernfs_drain(kn); } up_write(&root->kernfs_rwsem); } static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos, *parent; /* Short-circuit if non-root @kn has already finished removal. */ if (!kn) return; lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); /* * This is for kernfs_remove_self() which plays with active ref * after removal. */ if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) return; pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn)); /* prevent new usage by marking all nodes removing and deactivating */ pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) { pos->flags |= KERNFS_REMOVING; if (kernfs_active(pos)) atomic_add(KN_DEACTIVATED_BIAS, &pos->active); } /* deactivate and unlink the subtree node-by-node */ do { pos = kernfs_leftmost_descendant(kn); /* * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's * base ref could have been put by someone else by the time * the function returns. Make sure it doesn't go away * underneath us. */ kernfs_get(pos); kernfs_drain(pos); parent = kernfs_parent(pos); /* * kernfs_unlink_sibling() succeeds once per node. Use it * to decide who's responsible for cleanups. */ if (!parent || kernfs_unlink_sibling(pos)) { struct kernfs_iattrs *ps_iattr = parent ? parent->iattr : NULL; /* update timestamps on the parent */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (ps_iattr) { ktime_get_real_ts64(&ps_iattr->ia_ctime); ps_iattr->ia_mtime = ps_iattr->ia_ctime; } up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); kernfs_put(pos); } kernfs_put(pos); } while (pos != kn); } /** * kernfs_remove - remove a kernfs_node recursively * @kn: the kernfs_node to remove * * Remove @kn along with all its subdirectories and files. */ void kernfs_remove(struct kernfs_node *kn) { struct kernfs_root *root; if (!kn) return; root = kernfs_root(kn); down_write(&root->kernfs_rwsem); __kernfs_remove(kn); up_write(&root->kernfs_rwsem); } /** * kernfs_break_active_protection - break out of active protection * @kn: the self kernfs_node * * The caller must be running off of a kernfs operation which is invoked * with an active reference - e.g. one of kernfs_ops. Each invocation of * this function must also be matched with an invocation of * kernfs_unbreak_active_protection(). * * This function releases the active reference of @kn the caller is * holding. Once this function is called, @kn may be removed at any point * and the caller is solely responsible for ensuring that the objects it * dereferences are accessible. */ void kernfs_break_active_protection(struct kernfs_node *kn) { /* * Take out ourself out of the active ref dependency chain. If * we're called without an active ref, lockdep will complain. */ kernfs_put_active(kn); } /** * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() * @kn: the self kernfs_node * * If kernfs_break_active_protection() was called, this function must be * invoked before finishing the kernfs operation. Note that while this * function restores the active reference, it doesn't and can't actually * restore the active protection - @kn may already or be in the process of * being drained and removed. Once kernfs_break_active_protection() is * invoked, that protection is irreversibly gone for the kernfs operation * instance. * * While this function may be called at any point after * kernfs_break_active_protection() is invoked, its most useful location * would be right before the enclosing kernfs operation returns. */ void kernfs_unbreak_active_protection(struct kernfs_node *kn) { /* * @kn->active could be in any state; however, the increment we do * here will be undone as soon as the enclosing kernfs operation * finishes and this temporary bump can't break anything. If @kn * is alive, nothing changes. If @kn is being deactivated, the * soon-to-follow put will either finish deactivation or restore * deactivated state. If @kn is already removed, the temporary * bump is guaranteed to be gone before @kn is released. */ atomic_inc(&kn->active); if (kernfs_lockdep(kn)) rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); } /** * kernfs_remove_self - remove a kernfs_node from its own method * @kn: the self kernfs_node to remove * * The caller must be running off of a kernfs operation which is invoked * with an active reference - e.g. one of kernfs_ops. This can be used to * implement a file operation which deletes itself. * * For example, the "delete" file for a sysfs device directory can be * implemented by invoking kernfs_remove_self() on the "delete" file * itself. This function breaks the circular dependency of trying to * deactivate self while holding an active ref itself. It isn't necessary * to modify the usual removal path to use kernfs_remove_self(). The * "delete" implementation can simply invoke kernfs_remove_self() on self * before proceeding with the usual removal path. kernfs will ignore later * kernfs_remove() on self. * * kernfs_remove_self() can be called multiple times concurrently on the * same kernfs_node. Only the first one actually performs removal and * returns %true. All others will wait until the kernfs operation which * won self-removal finishes and return %false. Note that the losers wait * for the completion of not only the winning kernfs_remove_self() but also * the whole kernfs_ops which won the arbitration. This can be used to * guarantee, for example, all concurrent writes to a "delete" file to * finish only after the whole operation is complete. * * Return: %true if @kn is removed by this call, otherwise %false. */ bool kernfs_remove_self(struct kernfs_node *kn) { bool ret; struct kernfs_root *root = kernfs_root(kn); down_write(&root->kernfs_rwsem); kernfs_break_active_protection(kn); /* * SUICIDAL is used to arbitrate among competing invocations. Only * the first one will actually perform removal. When the removal * is complete, SUICIDED is set and the active ref is restored * while kernfs_rwsem for held exclusive. The ones which lost * arbitration waits for SUICIDED && drained which can happen only * after the enclosing kernfs operation which executed the winning * instance of kernfs_remove_self() finished. */ if (!(kn->flags & KERNFS_SUICIDAL)) { kn->flags |= KERNFS_SUICIDAL; __kernfs_remove(kn); kn->flags |= KERNFS_SUICIDED; ret = true; } else { wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; DEFINE_WAIT(wait); while (true) { prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); if ((kn->flags & KERNFS_SUICIDED) && atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) break; up_write(&root->kernfs_rwsem); schedule(); down_write(&root->kernfs_rwsem); } finish_wait(waitq, &wait); WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); ret = false; } /* * This must be done while kernfs_rwsem held exclusive; otherwise, * waiting for SUICIDED && deactivated could finish prematurely. */ kernfs_unbreak_active_protection(kn); up_write(&root->kernfs_rwsem); return ret; } /** * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it * @parent: parent of the target * @name: name of the kernfs_node to remove * @ns: namespace tag of the kernfs_node to remove * * Look for the kernfs_node with @name and @ns under @parent and remove it. * * Return: %0 on success, -ENOENT if such entry doesn't exist. */ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns) { struct kernfs_node *kn; struct kernfs_root *root; if (!parent) { WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", name); return -ENOENT; } root = kernfs_root(parent); down_write(&root->kernfs_rwsem); kn = kernfs_find_ns(parent, name, ns); if (kn) { kernfs_get(kn); __kernfs_remove(kn); kernfs_put(kn); } up_write(&root->kernfs_rwsem); if (kn) return 0; else return -ENOENT; } /** * kernfs_rename_ns - move and rename a kernfs_node * @kn: target node * @new_parent: new parent to put @sd under * @new_name: new name * @new_ns: new namespace tag * * Return: %0 on success, -errno on failure. */ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { struct kernfs_node *old_parent; struct kernfs_root *root; const char *old_name; int error; /* can't move or rename root */ if (!rcu_access_pointer(kn->__parent)) return -EINVAL; root = kernfs_root(kn); down_write(&root->kernfs_rwsem); error = -ENOENT; if (!kernfs_active(kn) || !kernfs_active(new_parent) || (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; old_parent = kernfs_parent(kn); if (root->flags & KERNFS_ROOT_INVARIANT_PARENT) { error = -EINVAL; if (WARN_ON_ONCE(old_parent != new_parent)) goto out; } error = 0; old_name = kernfs_rcu_name(kn); if (!new_name) new_name = old_name; if ((old_parent == new_parent) && (kn->ns == new_ns) && (strcmp(old_name, new_name) == 0)) goto out; /* nothing to rename */ error = -EEXIST; if (kernfs_find_ns(new_parent, new_name, new_ns)) goto out; /* rename kernfs_node */ if (strcmp(old_name, new_name) != 0) { error = -ENOMEM; new_name = kstrdup_const(new_name, GFP_KERNEL); if (!new_name) goto out; } else { new_name = NULL; } /* * Move to the appropriate place in the appropriate directories rbtree. */ kernfs_unlink_sibling(kn); /* rename_lock protects ->parent accessors */ if (old_parent != new_parent) { kernfs_get(new_parent); write_lock_irq(&root->kernfs_rename_lock); rcu_assign_pointer(kn->__parent, new_parent); kn->ns = new_ns; if (new_name) rcu_assign_pointer(kn->name, new_name); write_unlock_irq(&root->kernfs_rename_lock); kernfs_put(old_parent); } else { /* name assignment is RCU protected, parent is the same */ kn->ns = new_ns; if (new_name) rcu_assign_pointer(kn->name, new_name); } kn->hash = kernfs_name_hash(new_name ?: old_name, kn->ns); kernfs_link_sibling(kn); if (new_name && !is_kernel_rodata((unsigned long)old_name)) kfree_rcu_mightsleep(old_name); error = 0; out: up_write(&root->kernfs_rwsem); return error; } static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) { kernfs_put(filp->private_data); return 0; } static struct kernfs_node *kernfs_dir_pos(const void *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { if (pos) { int valid = kernfs_active(pos) && rcu_access_pointer(pos->__parent) == parent && hash == pos->hash; kernfs_put(pos); if (!valid) pos = NULL; } if (!pos && (hash > 1) && (hash < INT_MAX)) { struct rb_node *node = parent->dir.children.rb_node; while (node) { pos = rb_to_kn(node); if (hash < pos->hash) node = node->rb_left; else if (hash > pos->hash) node = node->rb_right; else break; } } /* Skip over entries which are dying/dead or in the wrong namespace */ while (pos && (!kernfs_active(pos) || pos->ns != ns)) { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; else pos = rb_to_kn(node); } return pos; } static struct kernfs_node *kernfs_dir_next_pos(const void *ns, struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) { pos = kernfs_dir_pos(ns, parent, ino, pos); if (pos) { do { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; else pos = rb_to_kn(node); } while (pos && (!kernfs_active(pos) || pos->ns != ns)); } return pos; } static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct kernfs_node *parent = kernfs_dentry_node(dentry); struct kernfs_node *pos = file->private_data; struct kernfs_root *root; const void *ns = NULL; if (!dir_emit_dots(file, ctx)) return 0; root = kernfs_root(parent); down_read(&root->kernfs_rwsem); if (kernfs_ns_enabled(parent)) ns = kernfs_info(dentry->d_sb)->ns; for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); pos; pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { const char *name = kernfs_rcu_name(pos); unsigned int type = fs_umode_to_dtype(pos->mode); int len = strlen(name); ino_t ino = kernfs_ino(pos); ctx->pos = pos->hash; file->private_data = pos; kernfs_get(pos); if (!dir_emit(ctx, name, len, ino, type)) { up_read(&root->kernfs_rwsem); return 0; } } up_read(&root->kernfs_rwsem); file->private_data = NULL; ctx->pos = INT_MAX; return 0; } const struct file_operations kernfs_dir_fops = { .read = generic_read_dir, .iterate_shared = kernfs_fop_readdir, .release = kernfs_dir_fop_release, .llseek = generic_file_llseek, };
5 5 5 5 3 2 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Public Key Signature Algorithm * * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/sig.h> #include <linux/cryptouser.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/string.h> #include <net/netlink.h> #include "internal.h" static void crypto_sig_exit_tfm(struct crypto_tfm *tfm) { struct crypto_sig *sig = __crypto_sig_tfm(tfm); struct sig_alg *alg = crypto_sig_alg(sig); alg->exit(sig); } static int crypto_sig_init_tfm(struct crypto_tfm *tfm) { struct crypto_sig *sig = __crypto_sig_tfm(tfm); struct sig_alg *alg = crypto_sig_alg(sig); if (alg->exit) sig->base.exit = crypto_sig_exit_tfm; if (alg->init) return alg->init(sig); return 0; } static void crypto_sig_free_instance(struct crypto_instance *inst) { struct sig_instance *sig = sig_instance(inst); sig->free(sig); } static void __maybe_unused crypto_sig_show(struct seq_file *m, struct crypto_alg *alg) { seq_puts(m, "type : sig\n"); } static int __maybe_unused crypto_sig_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_sig rsig = {}; strscpy(rsig.type, "sig", sizeof(rsig.type)); return nla_put(skb, CRYPTOCFGA_REPORT_SIG, sizeof(rsig), &rsig); } static const struct crypto_type crypto_sig_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_sig_init_tfm, .free = crypto_sig_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_sig_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_sig_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SIG, .tfmsize = offsetof(struct crypto_sig, base), .algsize = offsetof(struct sig_alg, base), }; struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_sig_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_sig); static int sig_default_sign(struct crypto_sig *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { return -ENOSYS; } static int sig_default_verify(struct crypto_sig *tfm, const void *src, unsigned int slen, const void *dst, unsigned int dlen) { return -ENOSYS; } static int sig_default_set_key(struct crypto_sig *tfm, const void *key, unsigned int keylen) { return -ENOSYS; } static unsigned int sig_default_size(struct crypto_sig *tfm) { return DIV_ROUND_UP_POW2(crypto_sig_keysize(tfm), BITS_PER_BYTE); } static int sig_prepare_alg(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; if (!alg->sign) alg->sign = sig_default_sign; if (!alg->verify) alg->verify = sig_default_verify; if (!alg->set_priv_key) alg->set_priv_key = sig_default_set_key; if (!alg->set_pub_key) return -EINVAL; if (!alg->key_size) return -EINVAL; if (!alg->max_size) alg->max_size = sig_default_size; if (!alg->digest_size) alg->digest_size = sig_default_size; base->cra_type = &crypto_sig_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_SIG; return 0; } int crypto_register_sig(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; int err; err = sig_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_sig); void crypto_unregister_sig(struct sig_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_sig); int sig_register_instance(struct crypto_template *tmpl, struct sig_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = sig_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, sig_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(sig_register_instance); int crypto_grab_sig(struct crypto_sig_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_sig_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_sig); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Public Key Signature Algorithms");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 /* SPDX-License-Identifier: GPL-2.0 */ /* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces. * * File es58x_core.h: All common definitions and declarations. * * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved. * Copyright (c) 2020 ETAS K.K.. All rights reserved. * Copyright (c) 2020-2022 Vincent Mailhol <mailhol.vincent@wanadoo.fr> */ #ifndef __ES58X_COMMON_H__ #define __ES58X_COMMON_H__ #include <linux/can.h> #include <linux/can/dev.h> #include <linux/netdevice.h> #include <linux/types.h> #include <linux/usb.h> #include <net/devlink.h> #include "es581_4.h" #include "es58x_fd.h" /* Driver constants */ #define ES58X_RX_URBS_MAX 5 /* Empirical value */ #define ES58X_TX_URBS_MAX 6 /* Empirical value */ #define ES58X_MAX(param) \ (ES581_4_##param > ES58X_FD_##param ? \ ES581_4_##param : ES58X_FD_##param) #define ES58X_TX_BULK_MAX ES58X_MAX(TX_BULK_MAX) #define ES58X_RX_BULK_MAX ES58X_MAX(RX_BULK_MAX) #define ES58X_ECHO_BULK_MAX ES58X_MAX(ECHO_BULK_MAX) #define ES58X_NUM_CAN_CH_MAX ES58X_MAX(NUM_CAN_CH) /* Use this when channel index is irrelevant (e.g. device * timestamp). */ #define ES58X_CHANNEL_IDX_NA 0xFF #define ES58X_EMPTY_MSG NULL /* Threshold on consecutive CAN_STATE_ERROR_PASSIVE. If we receive * ES58X_CONSECUTIVE_ERR_PASSIVE_MAX times the event * ES58X_ERR_CRTL_PASSIVE in a row without any successful RX or TX, * we force the device to switch to CAN_STATE_BUS_OFF state. */ #define ES58X_CONSECUTIVE_ERR_PASSIVE_MAX 254 /* A magic number sent by the ES581.4 to inform it is alive. */ #define ES58X_HEARTBEAT 0x11 /** * enum es58x_driver_info - Quirks of the device. * @ES58X_DUAL_CHANNEL: Device has two CAN channels. If this flag is * not set, it is implied that the device has only one CAN * channel. * @ES58X_FD_FAMILY: Device is CAN-FD capable. If this flag is not * set, the device only supports classical CAN. */ enum es58x_driver_info { ES58X_DUAL_CHANNEL = BIT(0), ES58X_FD_FAMILY = BIT(1) }; enum es58x_echo { ES58X_ECHO_OFF = 0, ES58X_ECHO_ON = 1 }; /** * enum es58x_physical_layer - Type of the physical layer. * @ES58X_PHYSICAL_LAYER_HIGH_SPEED: High-speed CAN (c.f. ISO * 11898-2). * * Some products of the ETAS portfolio also support low-speed CAN * (c.f. ISO 11898-3). However, all the devices in scope of this * driver do not support the option, thus, the enum has only one * member. */ enum es58x_physical_layer { ES58X_PHYSICAL_LAYER_HIGH_SPEED = 1 }; enum es58x_samples_per_bit { ES58X_SAMPLES_PER_BIT_ONE = 1, ES58X_SAMPLES_PER_BIT_THREE = 2 }; /** * enum es58x_sync_edge - Synchronization method. * @ES58X_SYNC_EDGE_SINGLE: ISO CAN specification defines the use of a * single edge synchronization. The synchronization should be * done on recessive to dominant level change. * * For information, ES582.1 and ES584.1 also support a double * synchronization, requiring both recessive to dominant then dominant * to recessive level change. However, this is not supported in * SocketCAN framework, thus, the enum has only one member. */ enum es58x_sync_edge { ES58X_SYNC_EDGE_SINGLE = 1 }; /** * enum es58x_flag - CAN flags for RX/TX messages. * @ES58X_FLAG_EFF: Extended Frame Format (EFF). * @ES58X_FLAG_RTR: Remote Transmission Request (RTR). * @ES58X_FLAG_FD_BRS: Bit rate switch (BRS): second bitrate for * payload data. * @ES58X_FLAG_FD_ESI: Error State Indicator (ESI): tell if the * transmitting node is in error passive mode. * @ES58X_FLAG_FD_DATA: CAN FD frame. */ enum es58x_flag { ES58X_FLAG_EFF = BIT(0), ES58X_FLAG_RTR = BIT(1), ES58X_FLAG_FD_BRS = BIT(3), ES58X_FLAG_FD_ESI = BIT(5), ES58X_FLAG_FD_DATA = BIT(6) }; /** * enum es58x_err - CAN error detection. * @ES58X_ERR_OK: No errors. * @ES58X_ERR_PROT_STUFF: Bit stuffing error: more than 5 consecutive * equal bits. * @ES58X_ERR_PROT_FORM: Frame format error. * @ES58X_ERR_ACK: Received no ACK on transmission. * @ES58X_ERR_PROT_BIT: Single bit error. * @ES58X_ERR_PROT_CRC: Incorrect 15, 17 or 21 bits CRC. * @ES58X_ERR_PROT_BIT1: Unable to send recessive bit: tried to send * recessive bit 1 but monitored dominant bit 0. * @ES58X_ERR_PROT_BIT0: Unable to send dominant bit: tried to send * dominant bit 0 but monitored recessive bit 1. * @ES58X_ERR_PROT_OVERLOAD: Bus overload. * @ES58X_ERR_PROT_UNSPEC: Unspecified. * * Please refer to ISO 11898-1:2015, section 10.11 "Error detection" * and section 10.13 "Overload signaling" for additional details. */ enum es58x_err { ES58X_ERR_OK = 0, ES58X_ERR_PROT_STUFF = BIT(0), ES58X_ERR_PROT_FORM = BIT(1), ES58X_ERR_ACK = BIT(2), ES58X_ERR_PROT_BIT = BIT(3), ES58X_ERR_PROT_CRC = BIT(4), ES58X_ERR_PROT_BIT1 = BIT(5), ES58X_ERR_PROT_BIT0 = BIT(6), ES58X_ERR_PROT_OVERLOAD = BIT(7), ES58X_ERR_PROT_UNSPEC = BIT(31) }; /** * enum es58x_event - CAN error codes returned by the device. * @ES58X_EVENT_OK: No errors. * @ES58X_EVENT_CRTL_ACTIVE: Active state: both TR and RX error count * is less than 128. * @ES58X_EVENT_CRTL_PASSIVE: Passive state: either TX or RX error * count is greater than 127. * @ES58X_EVENT_CRTL_WARNING: Warning state: either TX or RX error * count is greater than 96. * @ES58X_EVENT_BUSOFF: Bus off. * @ES58X_EVENT_SINGLE_WIRE: Lost connection on either CAN high or CAN * low. * * Please refer to ISO 11898-1:2015, section 12.1.4 "Rules of fault * confinement" for additional details. */ enum es58x_event { ES58X_EVENT_OK = 0, ES58X_EVENT_CRTL_ACTIVE = BIT(0), ES58X_EVENT_CRTL_PASSIVE = BIT(1), ES58X_EVENT_CRTL_WARNING = BIT(2), ES58X_EVENT_BUSOFF = BIT(3), ES58X_EVENT_SINGLE_WIRE = BIT(4) }; /* enum es58x_ret_u8 - Device return error codes, 8 bit format. * * Specific to ES581.4. */ enum es58x_ret_u8 { ES58X_RET_U8_OK = 0x00, ES58X_RET_U8_ERR_UNSPECIFIED_FAILURE = 0x80, ES58X_RET_U8_ERR_NO_MEM = 0x81, ES58X_RET_U8_ERR_BAD_CRC = 0x99 }; /* enum es58x_ret_u32 - Device return error codes, 32 bit format. */ enum es58x_ret_u32 { ES58X_RET_U32_OK = 0x00000000UL, ES58X_RET_U32_ERR_UNSPECIFIED_FAILURE = 0x80000000UL, ES58X_RET_U32_ERR_NO_MEM = 0x80004001UL, ES58X_RET_U32_WARN_PARAM_ADJUSTED = 0x40004000UL, ES58X_RET_U32_WARN_TX_MAYBE_REORDER = 0x40004001UL, ES58X_RET_U32_ERR_TIMEDOUT = 0x80000008UL, ES58X_RET_U32_ERR_FIFO_FULL = 0x80003002UL, ES58X_RET_U32_ERR_BAD_CONFIG = 0x80004000UL, ES58X_RET_U32_ERR_NO_RESOURCE = 0x80004002UL }; /* enum es58x_ret_type - Type of the command returned by the ES58X * device. */ enum es58x_ret_type { ES58X_RET_TYPE_SET_BITTIMING, ES58X_RET_TYPE_ENABLE_CHANNEL, ES58X_RET_TYPE_DISABLE_CHANNEL, ES58X_RET_TYPE_TX_MSG, ES58X_RET_TYPE_RESET_RX, ES58X_RET_TYPE_RESET_TX, ES58X_RET_TYPE_DEVICE_ERR }; union es58x_urb_cmd { struct es581_4_urb_cmd es581_4_urb_cmd; struct es58x_fd_urb_cmd es58x_fd_urb_cmd; struct { /* Common header parts of all variants */ __le16 sof; u8 cmd_type; u8 cmd_id; } __packed; DECLARE_FLEX_ARRAY(u8, raw_cmd); }; /** * struct es58x_priv - All information specific to a CAN channel. * @can: struct can_priv must be the first member (Socket CAN relies * on the fact that function netdev_priv() returns a pointer to * a struct can_priv). * @devlink_port: devlink instance for the network interface. * @es58x_dev: pointer to the corresponding ES58X device. * @tx_urb: Used as a buffer to concatenate the TX messages and to do * a bulk send. Please refer to es58x_start_xmit() for more * details. * @tx_tail: Index of the oldest packet still pending for * completion. @tx_tail & echo_skb_mask represents the beginning * of the echo skb FIFO, i.e. index of the first element. * @tx_head: Index of the next packet to be sent to the * device. @tx_head & echo_skb_mask represents the end of the * echo skb FIFO plus one, i.e. the first free index. * @tx_can_msg_cnt: Number of messages in @tx_urb. * @tx_can_msg_is_fd: false: all messages in @tx_urb are Classical * CAN, true: all messages in @tx_urb are CAN FD. Rationale: * ES58X FD devices do not allow to mix Classical CAN and FD CAN * frames in one single bulk transmission. * @err_passive_before_rtx_success: The ES58X device might enter in a * state in which it keeps alternating between error passive * and active states. This counter keeps track of the number of * error passive and if it gets bigger than * ES58X_CONSECUTIVE_ERR_PASSIVE_MAX, es58x_rx_err_msg() will * force the status to bus-off. * @channel_idx: Channel index, starts at zero. */ struct es58x_priv { struct can_priv can; struct devlink_port devlink_port; struct es58x_device *es58x_dev; struct urb *tx_urb; u32 tx_tail; u32 tx_head; u8 tx_can_msg_cnt; bool tx_can_msg_is_fd; u8 err_passive_before_rtx_success; u8 channel_idx; }; /** * struct es58x_parameters - Constant parameters of a given hardware * variant. * @bittiming_const: Nominal bittimming constant parameters. * @data_bittiming_const: Data bittiming constant parameters. * @tdc_const: Transmission Delay Compensation constant parameters. * @bitrate_max: Maximum bitrate supported by the device. * @clock: CAN clock parameters. * @ctrlmode_supported: List of supported modes. Please refer to * can/netlink.h file for additional details. * @tx_start_of_frame: Magic number at the beginning of each TX URB * command. * @rx_start_of_frame: Magic number at the beginning of each RX URB * command. * @tx_urb_cmd_max_len: Maximum length of a TX URB command. * @rx_urb_cmd_max_len: Maximum length of a RX URB command. * @fifo_mask: Bit mask to quickly convert the tx_tail and tx_head * field of the struct es58x_priv into echo_skb * indexes. Properties: @fifo_mask = echo_skb_max - 1 where * echo_skb_max must be a power of two. Also, echo_skb_max must * not exceed the maximum size of the device internal TX FIFO * length. This parameter is used to control the network queue * wake/stop logic. * @dql_min_limit: Dynamic Queue Limits (DQL) absolute minimum limit * of bytes allowed to be queued on this network device transmit * queue. Used by the Byte Queue Limits (BQL) to determine how * frequently the xmit_more flag will be set to true in * es58x_start_xmit(). Set this value higher to optimize for * throughput but be aware that it might have a negative impact * on the latency! This value can also be set dynamically. Please * refer to Documentation/ABI/testing/sysfs-class-net-queues for * more details. * @tx_bulk_max: Maximum number of TX messages that can be sent in one * single URB packet. * @urb_cmd_header_len: Length of the URB command header. * @rx_urb_max: Number of RX URB to be allocated during device probe. * @tx_urb_max: Number of TX URB to be allocated during device probe. */ struct es58x_parameters { const struct can_bittiming_const *bittiming_const; const struct can_bittiming_const *data_bittiming_const; const struct can_tdc_const *tdc_const; u32 bitrate_max; struct can_clock clock; u32 ctrlmode_supported; u16 tx_start_of_frame; u16 rx_start_of_frame; u16 tx_urb_cmd_max_len; u16 rx_urb_cmd_max_len; u16 fifo_mask; u16 dql_min_limit; u8 tx_bulk_max; u8 urb_cmd_header_len; u8 rx_urb_max; u8 tx_urb_max; }; /** * struct es58x_operators - Function pointers used to encode/decode * the TX/RX messages. * @get_msg_len: Get field msg_len of the urb_cmd. The offset of * msg_len inside urb_cmd depends of the device model. * @handle_urb_cmd: Decode the URB command received from the device * and dispatch it to the relevant sub function. * @fill_urb_header: Fill the header of urb_cmd. * @tx_can_msg: Encode a TX CAN message and add it to the bulk buffer * cmd_buf of es58x_dev. * @enable_channel: Start the CAN channel. * @disable_channel: Stop the CAN channel. * @reset_device: Full reset of the device. N.B: this feature is only * present on the ES581.4. For ES58X FD devices, this field is * set to NULL. * @get_timestamp: Request a timestamp from the ES58X device. */ struct es58x_operators { u16 (*get_msg_len)(const union es58x_urb_cmd *urb_cmd); int (*handle_urb_cmd)(struct es58x_device *es58x_dev, const union es58x_urb_cmd *urb_cmd); void (*fill_urb_header)(union es58x_urb_cmd *urb_cmd, u8 cmd_type, u8 cmd_id, u8 channel_idx, u16 cmd_len); int (*tx_can_msg)(struct es58x_priv *priv, const struct sk_buff *skb); int (*enable_channel)(struct es58x_priv *priv); int (*disable_channel)(struct es58x_priv *priv); int (*reset_device)(struct es58x_device *es58x_dev); int (*get_timestamp)(struct es58x_device *es58x_dev); }; /** * struct es58x_sw_version - Version number of the firmware or the * bootloader. * @major: Version major number, represented on two digits. * @minor: Version minor number, represented on two digits. * @revision: Version revision number, represented on two digits. * * The firmware and the bootloader share the same format: "xx.xx.xx" * where 'x' is a digit. Both can be retrieved from the product * information string. */ struct es58x_sw_version { u8 major; u8 minor; u8 revision; }; /** * struct es58x_hw_revision - Hardware revision number. * @letter: Revision letter, an alphanumeric character. * @major: Version major number, represented on three digits. * @minor: Version minor number, represented on three digits. * * The hardware revision uses its own format: "axxx/xxx" where 'a' is * an alphanumeric character and 'x' a digit. It can be retrieved from * the product information string. */ struct es58x_hw_revision { char letter; u16 major; u16 minor; }; /** * struct es58x_device - All information specific to an ES58X device. * @dev: Device information. * @udev: USB device information. * @netdev: Array of our CAN channels. * @param: The constant parameters. * @ops: Operators. * @rx_pipe: USB reception pipe. * @tx_pipe: USB transmission pipe. * @rx_urbs: Anchor for received URBs. * @tx_urbs_busy: Anchor for TX URBs which were send to the device. * @tx_urbs_idle: Anchor for TX USB which are idle. This driver * allocates the memory for the URBs during the probe. When a TX * URB is needed, it can be taken from this anchor. The network * queue wake/stop logic should prevent this URB from getting * empty. Please refer to es58x_get_tx_urb() for more details. * @tx_urbs_idle_cnt: number of urbs in @tx_urbs_idle. * @firmware_version: The firmware version number. * @bootloader_version: The bootloader version number. * @hardware_revision: The hardware revision number. * @ktime_req_ns: kernel timestamp when es58x_set_realtime_diff_ns() * was called. * @realtime_diff_ns: difference in nanoseconds between the clocks of * the ES58X device and the kernel. * @timestamps: a temporary buffer to store the time stamps before * feeding them to es58x_can_get_echo_skb(). Can only be used * in RX branches. * @num_can_ch: Number of CAN channel (i.e. number of elements of @netdev). * @opened_channel_cnt: number of channels opened. Free of race * conditions because its two users (net_device_ops:ndo_open() * and net_device_ops:ndo_close()) guarantee that the network * stack big kernel lock (a.k.a. rtnl_mutex) is being hold. * @rx_cmd_buf_len: Length of @rx_cmd_buf. * @rx_cmd_buf: The device might split the URB commands in an * arbitrary amount of pieces. This buffer is used to concatenate * all those pieces. Can only be used in RX branches. This field * has to be the last one of the structure because it is has a * flexible size (c.f. es58x_sizeof_es58x_device() function). */ struct es58x_device { struct device *dev; struct usb_device *udev; struct net_device *netdev[ES58X_NUM_CAN_CH_MAX]; const struct es58x_parameters *param; const struct es58x_operators *ops; unsigned int rx_pipe; unsigned int tx_pipe; struct usb_anchor rx_urbs; struct usb_anchor tx_urbs_busy; struct usb_anchor tx_urbs_idle; atomic_t tx_urbs_idle_cnt; struct es58x_sw_version firmware_version; struct es58x_sw_version bootloader_version; struct es58x_hw_revision hardware_revision; u64 ktime_req_ns; s64 realtime_diff_ns; u64 timestamps[ES58X_ECHO_BULK_MAX]; u8 num_can_ch; u8 opened_channel_cnt; u16 rx_cmd_buf_len; union es58x_urb_cmd rx_cmd_buf; }; /** * es58x_sizeof_es58x_device() - Calculate the maximum length of * struct es58x_device. * @es58x_dev_param: The constant parameters of the device. * * The length of struct es58x_device depends on the length of its last * field: rx_cmd_buf. This macro allows to optimize the memory * allocation. * * Return: length of struct es58x_device. */ static inline size_t es58x_sizeof_es58x_device(const struct es58x_parameters *es58x_dev_param) { return offsetof(struct es58x_device, rx_cmd_buf) + es58x_dev_param->rx_urb_cmd_max_len; } static inline int __es58x_check_msg_len(const struct device *dev, const char *stringified_msg, size_t actual_len, size_t expected_len) { if (expected_len != actual_len) { dev_err(dev, "Length of %s is %zu but received command is %zu.\n", stringified_msg, expected_len, actual_len); return -EMSGSIZE; } return 0; } /** * es58x_check_msg_len() - Check the size of a received message. * @dev: Device, used to print error messages. * @msg: Received message, must not be a pointer. * @actual_len: Length of the message as advertised in the command header. * * Must be a macro in order to accept the different types of messages * as an input. Can be use with any of the messages which have a fixed * length. Check for an exact match of the size. * * Return: zero on success, -EMSGSIZE if @actual_len differs from the * expected length. */ #define es58x_check_msg_len(dev, msg, actual_len) \ __es58x_check_msg_len(dev, __stringify(msg), \ actual_len, sizeof(msg)) static inline int __es58x_check_msg_max_len(const struct device *dev, const char *stringified_msg, size_t actual_len, size_t expected_len) { if (actual_len > expected_len) { dev_err(dev, "Maximum length for %s is %zu but received command is %zu.\n", stringified_msg, expected_len, actual_len); return -EOVERFLOW; } return 0; } /** * es58x_check_msg_max_len() - Check the maximum size of a received message. * @dev: Device, used to print error messages. * @msg: Received message, must not be a pointer. * @actual_len: Length of the message as advertised in the command header. * * Must be a macro in order to accept the different types of messages * as an input. To be used with the messages of variable sizes. Only * check that the message is not bigger than the maximum expected * size. * * Return: zero on success, -EOVERFLOW if @actual_len is greater than * the expected length. */ #define es58x_check_msg_max_len(dev, msg, actual_len) \ __es58x_check_msg_max_len(dev, __stringify(msg), \ actual_len, sizeof(msg)) static inline int __es58x_msg_num_element(const struct device *dev, const char *stringified_msg, size_t actual_len, size_t msg_len, size_t elem_len) { size_t actual_num_elem = actual_len / elem_len; size_t expected_num_elem = msg_len / elem_len; if (actual_num_elem == 0) { dev_err(dev, "Minimum length for %s is %zu but received command is %zu.\n", stringified_msg, elem_len, actual_len); return -EMSGSIZE; } else if ((actual_len % elem_len) != 0) { dev_err(dev, "Received command length: %zu is not a multiple of %s[0]: %zu\n", actual_len, stringified_msg, elem_len); return -EMSGSIZE; } else if (actual_num_elem > expected_num_elem) { dev_err(dev, "Array %s is supposed to have %zu elements each of size %zu...\n", stringified_msg, expected_num_elem, elem_len); dev_err(dev, "... But received command has %zu elements (total length %zu).\n", actual_num_elem, actual_len); return -EOVERFLOW; } return actual_num_elem; } /** * es58x_msg_num_element() - Check size and give the number of * elements in a message of array type. * @dev: Device, used to print error messages. * @msg: Received message, must be an array. * @actual_len: Length of the message as advertised in the command * header. * * Must be a macro in order to accept the different types of messages * as an input. To be used on message of array type. Array's element * has to be of fixed size (else use es58x_check_msg_max_len()). Check * that the total length is an exact multiple of the length of a * single element. * * Return: number of elements in the array on success, -EOVERFLOW if * @actual_len is greater than the expected length, -EMSGSIZE if * @actual_len is not a multiple of a single element. */ #define es58x_msg_num_element(dev, msg, actual_len) \ ({ \ size_t __elem_len = sizeof((msg)[0]) + __must_be_array(msg); \ __es58x_msg_num_element(dev, __stringify(msg), actual_len, \ sizeof(msg), __elem_len); \ }) /** * es58x_priv() - Get the priv member and cast it to struct es58x_priv. * @netdev: CAN network device. * * Return: ES58X device. */ static inline struct es58x_priv *es58x_priv(struct net_device *netdev) { return (struct es58x_priv *)netdev_priv(netdev); } /** * ES58X_SIZEOF_URB_CMD() - Calculate the maximum length of an urb * command for a given message field name. * @es58x_urb_cmd_type: type (either "struct es581_4_urb_cmd" or * "struct es58x_fd_urb_cmd"). * @msg_field: name of the message field. * * Must be a macro in order to accept the different command types as * an input. * * Return: length of the urb command. */ #define ES58X_SIZEOF_URB_CMD(es58x_urb_cmd_type, msg_field) \ (offsetof(es58x_urb_cmd_type, raw_msg) \ + sizeof_field(es58x_urb_cmd_type, msg_field) \ + sizeof_field(es58x_urb_cmd_type, \ reserved_for_crc16_do_not_use)) /** * es58x_get_urb_cmd_len() - Calculate the actual length of an urb * command for a given message length. * @es58x_dev: ES58X device. * @msg_len: Length of the message. * * Add the header and CRC lengths to the message length. * * Return: length of the urb command. */ static inline size_t es58x_get_urb_cmd_len(struct es58x_device *es58x_dev, u16 msg_len) { return es58x_dev->param->urb_cmd_header_len + msg_len + sizeof(u16); } /** * es58x_get_netdev() - Get the network device. * @es58x_dev: ES58X device. * @channel_no: The channel number as advertised in the urb command. * @channel_idx_offset: Some of the ES58x starts channel numbering * from 0 (ES58X FD), others from 1 (ES581.4). * @netdev: CAN network device. * * Do a sanity check on the index provided by the device. * * Return: zero on success, -ECHRNG if the received channel number is * out of range and -ENODEV if the network device is not yet * configured. */ static inline int es58x_get_netdev(struct es58x_device *es58x_dev, int channel_no, int channel_idx_offset, struct net_device **netdev) { int channel_idx = channel_no - channel_idx_offset; *netdev = NULL; if (channel_idx < 0 || channel_idx >= es58x_dev->num_can_ch) return -ECHRNG; *netdev = es58x_dev->netdev[channel_idx]; if (!*netdev || !netif_device_present(*netdev)) return -ENODEV; return 0; } /** * es58x_get_raw_can_id() - Get the CAN ID. * @cf: CAN frame. * * Mask the CAN ID in order to only keep the significant bits. * * Return: the raw value of the CAN ID. */ static inline int es58x_get_raw_can_id(const struct can_frame *cf) { if (cf->can_id & CAN_EFF_FLAG) return cf->can_id & CAN_EFF_MASK; else return cf->can_id & CAN_SFF_MASK; } /** * es58x_get_flags() - Get the CAN flags. * @skb: socket buffer of a CAN message. * * Return: the CAN flag as an enum es58x_flag. */ static inline enum es58x_flag es58x_get_flags(const struct sk_buff *skb) { struct canfd_frame *cf = (struct canfd_frame *)skb->data; enum es58x_flag es58x_flags = 0; if (cf->can_id & CAN_EFF_FLAG) es58x_flags |= ES58X_FLAG_EFF; if (can_is_canfd_skb(skb)) { es58x_flags |= ES58X_FLAG_FD_DATA; if (cf->flags & CANFD_BRS) es58x_flags |= ES58X_FLAG_FD_BRS; if (cf->flags & CANFD_ESI) es58x_flags |= ES58X_FLAG_FD_ESI; } else if (cf->can_id & CAN_RTR_FLAG) /* Remote frames are only defined in Classical CAN frames */ es58x_flags |= ES58X_FLAG_RTR; return es58x_flags; } /* es58x_core.c. */ int es58x_can_get_echo_skb(struct net_device *netdev, u32 packet_idx, u64 *tstamps, unsigned int pkts); int es58x_tx_ack_msg(struct net_device *netdev, u16 tx_free_entries, enum es58x_ret_u32 rx_cmd_ret_u32); int es58x_rx_can_msg(struct net_device *netdev, u64 timestamp, const u8 *data, canid_t can_id, enum es58x_flag es58x_flags, u8 dlc); int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, enum es58x_event event, u64 timestamp); void es58x_rx_timestamp(struct es58x_device *es58x_dev, u64 timestamp); int es58x_rx_cmd_ret_u8(struct device *dev, enum es58x_ret_type cmd_ret_type, enum es58x_ret_u8 rx_cmd_ret_u8); int es58x_rx_cmd_ret_u32(struct net_device *netdev, enum es58x_ret_type cmd_ret_type, enum es58x_ret_u32 rx_cmd_ret_u32); int es58x_send_msg(struct es58x_device *es58x_dev, u8 cmd_type, u8 cmd_id, const void *msg, u16 cmd_len, int channel_idx); /* es58x_devlink.c. */ void es58x_parse_product_info(struct es58x_device *es58x_dev); extern const struct devlink_ops es58x_dl_ops; /* es581_4.c. */ extern const struct es58x_parameters es581_4_param; extern const struct es58x_operators es581_4_ops; /* es58x_fd.c. */ extern const struct es58x_parameters es58x_fd_param; extern const struct es58x_operators es58x_fd_ops; #endif /* __ES58X_COMMON_H__ */
67 26 186 100 1 1 1 1 1 2 2 1 9 2 2 12 12 1 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_ECN_H_ #define _INET_ECN_H_ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/inet_sock.h> #include <net/dsfield.h> #include <net/checksum.h> enum { INET_ECN_NOT_ECT = 0, INET_ECN_ECT_1 = 1, INET_ECN_ECT_0 = 2, INET_ECN_CE = 3, INET_ECN_MASK = 3, }; extern int sysctl_tunnel_ecn_log; static inline int INET_ECN_is_ce(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_CE; } static inline int INET_ECN_is_not_ect(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; } static inline int INET_ECN_is_capable(__u8 dsfield) { return dsfield & INET_ECN_ECT_0; } /* * RFC 3168 9.1.1 * The full-functionality option for ECN encapsulation is to copy the * ECN codepoint of the inside header to the outside header on * encapsulation if the inside header is not-ECT or ECT, and to set the * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of * the inside header is CE. */ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) { outer &= ~INET_ECN_MASK; outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : INET_ECN_ECT_0; return outer; } static inline void INET_ECN_xmit(struct sock *sk) { inet_sk(sk)->tos |= INET_ECN_ECT_0; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } static inline void INET_ECN_dontxmit(struct sock *sk) { inet_sk(sk)->tos &= ~INET_ECN_MASK; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass &= ~INET_ECN_MASK; } #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ } while (0) #define IP6_ECN_flow_xmit(sk, label) do { \ if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ (label) |= htonl(INET_ECN_ECT_0 << 20); \ } while (0) static inline int IP_ECN_set_ce(struct iphdr *iph) { u32 ecn = (iph->tos + 1) & INET_ECN_MASK; __be16 check_add; /* * After the last operation we have (in binary): * INET_ECN_NOT_ECT => 01 * INET_ECN_ECT_1 => 10 * INET_ECN_ECT_0 => 11 * INET_ECN_CE => 00 */ if (!(ecn & 2)) return !ecn; /* * The following gives us: * INET_ECN_ECT_1 => check += htons(0xFFFD) * INET_ECN_ECT_0 => check += htons(0xFFFE) */ check_add = (__force __be16)((__force u16)htons(0xFFFB) + (__force u16)htons(ecn)); iph->check = csum16_add(iph->check, check_add); iph->tos |= INET_ECN_CE; return 1; } static inline int IP_ECN_set_ect1(struct iphdr *iph) { if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; iph->check = csum16_add(iph->check, htons(0x1)); iph->tos ^= INET_ECN_MASK; return 1; } static inline void IP_ECN_clear(struct iphdr *iph) { iph->tos &= ~INET_ECN_MASK; } static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) { dscp &= ~INET_ECN_MASK; ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); } struct ipv6hdr; /* Note: * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE * In IPv6 case, no checksum compensates the change in IPv6 header, * so we have to update skb->csum. */ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; from = *(__be32 *)iph; to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; from = *(__be32 *)iph; to = from ^ htonl(INET_ECN_MASK << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); } static inline int INET_ECN_set_ce(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ce(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } return 0; } static inline int skb_get_dsfield(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) break; return ipv4_get_dsfield(ip_hdr(skb)); case cpu_to_be16(ETH_P_IPV6): if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) break; return ipv6_get_dsfield(ipv6_hdr(skb)); } return -1; } static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ect1(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); break; } return 0; } /* * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant * tunnel egress MUST set the outgoing ECN field to the codepoint at the * intersection of the appropriate arriving inner header (row) and outer * header (column) in Figure 4 * * +---------+------------------------------------------------+ * |Arriving | Arriving Outer Header | * | Inner +---------+------------+------------+------------+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE | * +---------+---------+------------+------------+------------+ * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | * | CE | CE | CE | CE(!!!)| CE | * +---------+---------+------------+------------+------------+ * * Figure 4: New IP in IP Decapsulation Behaviour * * returns 0 on success * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { case INET_ECN_NOT_ECT: return 0; case INET_ECN_ECT_0: case INET_ECN_ECT_1: return 1; case INET_ECN_CE: return 2; } } *set_ce = INET_ECN_is_ce(outer); return 0; } static inline int INET_ECN_decapsulate(struct sk_buff *skb, __u8 outer, __u8 inner) { bool set_ce = false; int rc; rc = __INET_ECN_decapsulate(outer, inner, &set_ce); if (!rc) { if (set_ce) INET_ECN_set_ce(skb); else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) INET_ECN_set_ect1(skb); } return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, oiph->tos, inner); } static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } #endif
10 1 1 1 2 1 4 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/unaligned.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> struct nft_byteorder { u8 sreg; u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; }; void nft_byteorder_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = &regs->data[priv->sreg]; u32 *dst = &regs->data[priv->dreg]; u16 *s16, *d16; unsigned int i; s16 = (void *)src; d16 = (void *)dst; switch (priv->size) { case 8: { u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); nft_reg_store64(&dst64[i], be64_to_cpu((__force __be64)src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); nft_reg_store64(&dst64[i], src64); } break; } break; } case 4: switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 4; i++) dst[i] = ntohl((__force __be32)src[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 4; i++) dst[i] = (__force __u32)htonl(src[i]); break; } break; case 2: switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 2; i++) d16[i] = ntohs((__force __be16)s16[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 2; i++) d16[i] = (__force __u16)htons(s16[i]); break; } break; } } static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || tb[NFTA_BYTEORDER_DREG] == NULL || tb[NFTA_BYTEORDER_LEN] == NULL || tb[NFTA_BYTEORDER_SIZE] == NULL || tb[NFTA_BYTEORDER_OP] == NULL) return -EINVAL; priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); switch (priv->op) { case NFT_BYTEORDER_NTOH: case NFT_BYTEORDER_HTON: break; default: return -EINVAL; } err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); if (err < 0) return err; priv->size = size; switch (priv->size) { case 2: case 4: case 8: break; default: return -EINVAL; } err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg, priv->len); if (err < 0) return err; return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_byteorder *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static bool nft_byteorder_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { struct nft_byteorder *priv = nft_expr_priv(expr); nft_reg_track_cancel(track, priv->dreg, priv->len); return false; } static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { .name = "byteorder", .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, .owner = THIS_MODULE, };
8 8 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/spinlock.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/init.h> static LIST_HEAD(ax25_dev_list); DEFINE_SPINLOCK(ax25_dev_lock); ax25_dev *ax25_addr_ax25dev(ax25_address *addr) { ax25_dev *ax25_dev, *res = NULL; spin_lock_bh(&ax25_dev_lock); list_for_each_entry(ax25_dev, &ax25_dev_list, list) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; ax25_dev_hold(ax25_dev); break; } spin_unlock_bh(&ax25_dev_lock); return res; } /* * This is called when an interface is brought up. These are * reasonable defaults. */ void ax25_dev_device_up(struct net_device *dev) { ax25_dev *ax25_dev; ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_KERNEL); if (!ax25_dev) { printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n"); return; } refcount_set(&ax25_dev->refcount, 1); ax25_dev->dev = dev; netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); ax25_dev->forward = NULL; ax25_dev->device_up = true; ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE; ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE; ax25_dev->values[AX25_VALUES_BACKOFF] = AX25_DEF_BACKOFF; ax25_dev->values[AX25_VALUES_CONMODE] = AX25_DEF_CONMODE; ax25_dev->values[AX25_VALUES_WINDOW] = AX25_DEF_WINDOW; ax25_dev->values[AX25_VALUES_EWINDOW] = AX25_DEF_EWINDOW; ax25_dev->values[AX25_VALUES_T1] = AX25_DEF_T1; ax25_dev->values[AX25_VALUES_T2] = AX25_DEF_T2; ax25_dev->values[AX25_VALUES_T3] = AX25_DEF_T3; ax25_dev->values[AX25_VALUES_IDLE] = AX25_DEF_IDLE; ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2; ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN; ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL; #ifdef CONFIG_AX25_DAMA_SLAVE ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT; #endif #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_ds_setup_timer(ax25_dev); #endif spin_lock_bh(&ax25_dev_lock); list_add(&ax25_dev->list, &ax25_dev_list); rcu_assign_pointer(dev->ax25_ptr, ax25_dev); spin_unlock_bh(&ax25_dev_lock); ax25_register_dev_sysctl(ax25_dev); } void ax25_dev_device_down(struct net_device *dev) { ax25_dev *s, *ax25_dev; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; ax25_unregister_dev_sysctl(ax25_dev); spin_lock_bh(&ax25_dev_lock); #ifdef CONFIG_AX25_DAMA_SLAVE timer_shutdown_sync(&ax25_dev->dama.slave_timer); #endif /* * Remove any packet forwarding that points to this device. */ list_for_each_entry(s, &ax25_dev_list, list) if (s->forward == dev) s->forward = NULL; list_for_each_entry(s, &ax25_dev_list, list) { if (s == ax25_dev) { list_del(&s->list); break; } } RCU_INIT_POINTER(dev->ax25_ptr, NULL); spin_unlock_bh(&ax25_dev_lock); netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) { ax25_dev *ax25_dev, *fwd_dev; if ((ax25_dev = ax25_addr_ax25dev(&fwd->port_from)) == NULL) return -EINVAL; switch (cmd) { case SIOCAX25ADDFWD: fwd_dev = ax25_addr_ax25dev(&fwd->port_to); if (!fwd_dev) { ax25_dev_put(ax25_dev); return -EINVAL; } if (ax25_dev->forward) { ax25_dev_put(fwd_dev); ax25_dev_put(ax25_dev); return -EINVAL; } ax25_dev->forward = fwd_dev->dev; ax25_dev_put(fwd_dev); ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: if (!ax25_dev->forward) { ax25_dev_put(ax25_dev); return -EINVAL; } ax25_dev->forward = NULL; ax25_dev_put(ax25_dev); break; default: ax25_dev_put(ax25_dev); return -EINVAL; } return 0; } struct net_device *ax25_fwd_dev(struct net_device *dev) { ax25_dev *ax25_dev; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return dev; if (ax25_dev->forward == NULL) return dev; return ax25_dev->forward; } /* * Free all memory associated with device structures. */ void __exit ax25_dev_free(void) { ax25_dev *s, *n; spin_lock_bh(&ax25_dev_lock); list_for_each_entry_safe(s, n, &ax25_dev_list, list) { netdev_put(s->dev, &s->dev_tracker); list_del(&s->list); ax25_dev_put(s); } spin_unlock_bh(&ax25_dev_lock); }
1 1 1 9 17 5 11 3 14 1 1 4 6 6 5 1 2 1 5 50 7 50 21 21 21 4 17 21 34 34 43 45 14 12 14 7 7 7 7 13 6 6 2 2 2 26 1 3 7 5 8 7 12 12 7 48 2 6 1 6 1 5 1 1 7 6 1 6 6 6 1 8 1 7 7 6 1 1 1 1 1 12 12 11 1 12 15 15 1 15 12 12 11 1 12 1 12 1 12 11 1 12 12 7 3 20 1 2 7 1 10 8 10 8 5 5 7 1 1 1 1 1 5 5 18 5 17 17 14 14 12 13 9 2 9 7 7 4 4 1 1 7 6 17 1 1 7 5 1 4 1 8 17 21 1 4 17 1 17 17 4 4 7 2 1 4 5 1 1 1 2 1 1 1 1 3 1 1 13 14 14 5 2 16 7 22 22 18 21 4 17 1 1 1 7 6 1 2 3 1 1 1 1 1 1 1 2 2 3 6 34 2 9 9 1 1 1 1 1 2 4 1 1 1 2 1 1 2 1 1 1 2 6 1 1 3 2 2 2 5 5 60 6 1 1 1 1 1 1 34 10 1 2 2 1 1 1 1 2 2 1 13 13 13 13 13 19 18 14 3 9 9 9 3 7 6 8 6 3 1 1 11 9 12 2 9 1 8 7 1 1 2 6 6 1 5 5 1 5 5 5 5 5 5 5 1 5 5 5 12 12 1 14 10 14 2 3 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2009 Red Hat, Inc. * Copyright (C) 2006 Rusty Russell IBM Corporation * * Author: Michael S. Tsirkin <mst@redhat.com> * * Inspiration, some code, and most witty comments come from * Documentation/virtual/lguest/lguest.c, by Rusty Russell * * Generic code for virtio server in host kernel. */ #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/uio.h> #include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/file.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/kthread.h> #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/vhost_task.h> #include <linux/interval_tree_generic.h> #include <linux/nospec.h> #include <linux/kcov.h> #include "vhost.h" static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); static int max_iotlb_entries = 2048; module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. (default: 2048)"); static bool fork_from_owner_default = VHOST_FORK_OWNER_TASK; #ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL module_param(fork_from_owner_default, bool, 0444); MODULE_PARM_DESC(fork_from_owner_default, "Set task mode as the default(default: Y)"); #endif enum { VHOST_MEMORY_F_LOG = 0x1, }; #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) { vq->user_be = true; } static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) { vq->user_be = false; } static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; if (vq->private_data) return -EBUSY; if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; if (s.num != VHOST_VRING_LITTLE_ENDIAN && s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; if (s.num == VHOST_VRING_BIG_ENDIAN) vhost_enable_cross_endian_big(vq); else vhost_enable_cross_endian_little(vq); return 0; } static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, int __user *argp) { struct vhost_vring_state s = { .index = idx, .num = vq->user_be }; if (copy_to_user(argp, &s, sizeof(s))) return -EFAULT; return 0; } static void vhost_init_is_le(struct vhost_virtqueue *vq) { /* Note for legacy virtio: user_be is initialized at reset time * according to the host endianness. If userspace does not set an * explicit endianness, the default behavior is native endian, as * expected by legacy virtio. */ vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { return -ENOIOCTLCMD; } static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, int __user *argp) { return -ENOIOCTLCMD; } static void vhost_init_is_le(struct vhost_virtqueue *vq) { vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { vhost_init_is_le(vq); } struct vhost_flush_struct { struct vhost_work work; struct completion wait_event; }; static void vhost_flush_work(struct vhost_work *work) { struct vhost_flush_struct *s; s = container_of(work, struct vhost_flush_struct, work); complete(&s->wait_event); } static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { struct vhost_poll *poll; poll = container_of(pt, struct vhost_poll, table); poll->wqh = wqh; add_wait_queue(wqh, &poll->wait); } static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; if (!poll->dev->use_worker) work->fn(work); else vhost_poll_queue(poll); return 0; } void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; } EXPORT_SYMBOL_GPL(vhost_work_init); /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, __poll_t mask, struct vhost_dev *dev, struct vhost_virtqueue *vq) { init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; poll->dev = dev; poll->wqh = NULL; poll->vq = vq; vhost_work_init(&poll->work, fn); } EXPORT_SYMBOL_GPL(vhost_poll_init); /* Start polling a file. We add ourselves to file's wait queue. The caller must * keep a reference to a file until after vhost_poll_stop is called. */ int vhost_poll_start(struct vhost_poll *poll, struct file *file) { __poll_t mask; if (poll->wqh) return 0; mask = vfs_poll(file, &poll->table); if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { vhost_poll_stop(poll); return -EINVAL; } return 0; } EXPORT_SYMBOL_GPL(vhost_poll_start); /* Stop polling a file. After this function returns, it becomes safe to drop the * file reference. You must also flush afterwards. */ void vhost_poll_stop(struct vhost_poll *poll) { if (poll->wqh) { remove_wait_queue(poll->wqh, &poll->wait); poll->wqh = NULL; } } EXPORT_SYMBOL_GPL(vhost_poll_stop); static void vhost_worker_queue(struct vhost_worker *worker, struct vhost_work *work) { if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { /* We can only add the work to the list after we're * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &worker->work_list); worker->ops->wakeup(worker); } } bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) { struct vhost_worker *worker; bool queued = false; rcu_read_lock(); worker = rcu_dereference(vq->worker); if (worker) { queued = true; vhost_worker_queue(worker, work); } rcu_read_unlock(); return queued; } EXPORT_SYMBOL_GPL(vhost_vq_work_queue); /** * __vhost_worker_flush - flush a worker * @worker: worker to flush * * The worker's flush_mutex must be held. */ static void __vhost_worker_flush(struct vhost_worker *worker) { struct vhost_flush_struct flush; if (!worker->attachment_cnt || worker->killed) return; init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); vhost_worker_queue(worker, &flush.work); /* * Drop mutex in case our worker is killed and it needs to take the * mutex to force cleanup. */ mutex_unlock(&worker->mutex); wait_for_completion(&flush.wait_event); mutex_lock(&worker->mutex); } static void vhost_worker_flush(struct vhost_worker *worker) { mutex_lock(&worker->mutex); __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); } void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_worker *worker; unsigned long i; xa_for_each(&dev->worker_xa, i, worker) vhost_worker_flush(worker); } EXPORT_SYMBOL_GPL(vhost_dev_flush); /* A lockless hint for busy polling code to exit the loop */ bool vhost_vq_has_work(struct vhost_virtqueue *vq) { struct vhost_worker *worker; bool has_work = false; rcu_read_lock(); worker = rcu_dereference(vq->worker); if (worker && !llist_empty(&worker->work_list)) has_work = true; rcu_read_unlock(); return has_work; } EXPORT_SYMBOL_GPL(vhost_vq_has_work); void vhost_poll_queue(struct vhost_poll *poll) { vhost_vq_work_queue(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_queue); static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) { int j; for (j = 0; j < VHOST_NUM_ADDRS; j++) vq->meta_iotlb[j] = NULL; } static void vhost_vq_meta_reset(struct vhost_dev *d) { int i; for (i = 0; i < d->nvqs; ++i) __vhost_vq_meta_reset(d->vqs[i]); } static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) { call_ctx->ctx = NULL; memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); } bool vhost_vq_is_setup(struct vhost_virtqueue *vq) { return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); } EXPORT_SYMBOL_GPL(vhost_vq_is_setup); static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { vq->num = 1; vq->desc = NULL; vq->avail = NULL; vq->used = NULL; vq->last_avail_idx = 0; vq->next_avail_head = 0; vq->avail_idx = 0; vq->last_used_idx = 0; vq->signalled_used = 0; vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; vq->private_data = NULL; virtio_features_zero(vq->acked_features_array); vq->acked_backend_features = 0; vq->log_base = NULL; vq->error_ctx = NULL; vq->kick = NULL; vq->log_ctx = NULL; vhost_disable_cross_endian(vq); vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; rcu_assign_pointer(vq->worker, NULL); vhost_vring_call_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } static int vhost_run_work_kthread_list(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct vhost_dev *dev = worker->dev; struct llist_node *node; kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); break; } node = llist_del_all(&worker->work_list); if (!node) schedule(); node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); cond_resched(); } } kthread_unuse_mm(dev->mm); return 0; } static bool vhost_run_work_list(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct llist_node *node; node = llist_del_all(&worker->work_list); if (node) { __set_current_state(TASK_RUNNING); node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); cond_resched(); } } return !!node; } static void vhost_worker_killed(void *data) { struct vhost_worker *worker = data; struct vhost_dev *dev = worker->dev; struct vhost_virtqueue *vq; int i, attach_cnt = 0; mutex_lock(&worker->mutex); worker->killed = true; for (i = 0; i < dev->nvqs; i++) { vq = dev->vqs[i]; mutex_lock(&vq->mutex); if (worker == rcu_dereference_check(vq->worker, lockdep_is_held(&vq->mutex))) { rcu_assign_pointer(vq->worker, NULL); attach_cnt++; } mutex_unlock(&vq->mutex); } worker->attachment_cnt -= attach_cnt; if (attach_cnt) synchronize_rcu(); /* * Finish vhost_worker_flush calls and any other works that snuck in * before the synchronize_rcu. */ vhost_run_work_list(worker); mutex_unlock(&worker->mutex); } static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { kfree(vq->indirect); vq->indirect = NULL; kfree(vq->log); vq->log = NULL; kfree(vq->heads); vq->heads = NULL; kfree(vq->nheads); vq->nheads = NULL; } /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { struct vhost_virtqueue *vq; int i; for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->indirect = kmalloc_array(UIO_MAXIOV, sizeof(*vq->indirect), GFP_KERNEL); vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), GFP_KERNEL); vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), GFP_KERNEL); vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads), GFP_KERNEL); if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads) goto err_nomem; } return 0; err_nomem: for (; i >= 0; --i) vhost_vq_free_iovecs(dev->vqs[i]); return -ENOMEM; } static void vhost_dev_free_iovecs(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) vhost_vq_free_iovecs(dev->vqs[i]); } bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len) { struct vhost_dev *dev = vq->dev; if ((dev->byte_weight && total_len >= dev->byte_weight) || pkts >= dev->weight) { vhost_poll_queue(&vq->poll); return true; } return false; } EXPORT_SYMBOL_GPL(vhost_exceeds_weight); static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, unsigned int num) { size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return size_add(struct_size(vq->avail, ring, num), event); } static size_t vhost_get_used_size(struct vhost_virtqueue *vq, unsigned int num) { size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return size_add(struct_size(vq->used, ring, num), event); } static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, unsigned int num) { return sizeof(*vq->desc) * num; } void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, bool use_worker, int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg)) { struct vhost_virtqueue *vq; int i; dev->vqs = vqs; dev->nvqs = nvqs; mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; dev->use_worker = use_worker; dev->msg_handler = msg_handler; dev->fork_owner = fork_from_owner_default; init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; vq->nheads = NULL; vq->dev = dev; mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, EPOLLIN, dev, vq); } } EXPORT_SYMBOL_GPL(vhost_dev_init); /* Caller should have device mutex */ long vhost_dev_check_owner(struct vhost_dev *dev) { /* Are you the owner? If not, I don't think you mean to do that */ return dev->mm == current->mm ? 0 : -EPERM; } EXPORT_SYMBOL_GPL(vhost_dev_check_owner); struct vhost_attach_cgroups_struct { struct vhost_work work; struct task_struct *owner; int ret; }; static void vhost_attach_cgroups_work(struct vhost_work *work) { struct vhost_attach_cgroups_struct *s; s = container_of(work, struct vhost_attach_cgroups_struct, work); s->ret = cgroup_attach_task_all(s->owner, current); } static int vhost_attach_task_to_cgroups(struct vhost_worker *worker) { struct vhost_attach_cgroups_struct attach; int saved_cnt; attach.owner = current; vhost_work_init(&attach.work, vhost_attach_cgroups_work); vhost_worker_queue(worker, &attach.work); mutex_lock(&worker->mutex); /* * Bypass attachment_cnt check in __vhost_worker_flush: * Temporarily change it to INT_MAX to bypass the check */ saved_cnt = worker->attachment_cnt; worker->attachment_cnt = INT_MAX; __vhost_worker_flush(worker); worker->attachment_cnt = saved_cnt; mutex_unlock(&worker->mutex); return attach.ret; } /* Caller should have device mutex */ bool vhost_dev_has_owner(struct vhost_dev *dev) { return dev->mm; } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); static void vhost_attach_mm(struct vhost_dev *dev) { /* No owner, become one */ if (dev->use_worker) { dev->mm = get_task_mm(current); } else { /* vDPA device does not use worker thread, so there's * no need to hold the address space for mm. This helps * to avoid deadlock in the case of mmap() which may * hold the refcnt of the file and depends on release * method to remove vma. */ dev->mm = current->mm; mmgrab(dev->mm); } } static void vhost_detach_mm(struct vhost_dev *dev) { if (!dev->mm) return; if (dev->use_worker) mmput(dev->mm); else mmdrop(dev->mm); dev->mm = NULL; } static void vhost_worker_destroy(struct vhost_dev *dev, struct vhost_worker *worker) { if (!worker) return; WARN_ON(!llist_empty(&worker->work_list)); xa_erase(&dev->worker_xa, worker->id); worker->ops->stop(worker); kfree(worker); } static void vhost_workers_free(struct vhost_dev *dev) { struct vhost_worker *worker; unsigned long i; if (!dev->use_worker) return; for (i = 0; i < dev->nvqs; i++) rcu_assign_pointer(dev->vqs[i]->worker, NULL); /* * Free the default worker we created and cleanup workers userspace * created but couldn't clean up (it forgot or crashed). */ xa_for_each(&dev->worker_xa, i, worker) vhost_worker_destroy(dev, worker); xa_destroy(&dev->worker_xa); } static void vhost_task_wakeup(struct vhost_worker *worker) { return vhost_task_wake(worker->vtsk); } static void vhost_kthread_wakeup(struct vhost_worker *worker) { wake_up_process(worker->kthread_task); } static void vhost_task_do_stop(struct vhost_worker *worker) { return vhost_task_stop(worker->vtsk); } static void vhost_kthread_do_stop(struct vhost_worker *worker) { kthread_stop(worker->kthread_task); } static int vhost_task_worker_create(struct vhost_worker *worker, struct vhost_dev *dev, const char *name) { struct vhost_task *vtsk; u32 id; int ret; vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); if (IS_ERR(vtsk)) return PTR_ERR(vtsk); worker->vtsk = vtsk; vhost_task_start(vtsk); ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); if (ret < 0) { vhost_task_do_stop(worker); return ret; } worker->id = id; return 0; } static int vhost_kthread_worker_create(struct vhost_worker *worker, struct vhost_dev *dev, const char *name) { struct task_struct *task; u32 id; int ret; task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name); if (IS_ERR(task)) return PTR_ERR(task); worker->kthread_task = task; wake_up_process(task); ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); if (ret < 0) goto stop_worker; ret = vhost_attach_task_to_cgroups(worker); if (ret) goto stop_worker; worker->id = id; return 0; stop_worker: vhost_kthread_do_stop(worker); return ret; } static const struct vhost_worker_ops kthread_ops = { .create = vhost_kthread_worker_create, .stop = vhost_kthread_do_stop, .wakeup = vhost_kthread_wakeup, }; static const struct vhost_worker_ops vhost_task_ops = { .create = vhost_task_worker_create, .stop = vhost_task_do_stop, .wakeup = vhost_task_wakeup, }; static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; char name[TASK_COMM_LEN]; int ret; const struct vhost_worker_ops *ops = dev->fork_owner ? &vhost_task_ops : &kthread_ops; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); if (!worker) return NULL; worker->dev = dev; worker->ops = ops; snprintf(name, sizeof(name), "vhost-%d", current->pid); mutex_init(&worker->mutex); init_llist_head(&worker->work_list); worker->kcov_handle = kcov_common_handle(); ret = ops->create(worker, dev, name); if (ret < 0) goto free_worker; return worker; free_worker: kfree(worker); return NULL; } /* Caller must have device mutex */ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_worker *worker) { struct vhost_worker *old_worker; mutex_lock(&worker->mutex); if (worker->killed) { mutex_unlock(&worker->mutex); return; } mutex_lock(&vq->mutex); old_worker = rcu_dereference_check(vq->worker, lockdep_is_held(&vq->mutex)); rcu_assign_pointer(vq->worker, worker); worker->attachment_cnt++; if (!old_worker) { mutex_unlock(&vq->mutex); mutex_unlock(&worker->mutex); return; } mutex_unlock(&vq->mutex); mutex_unlock(&worker->mutex); /* * Take the worker mutex to make sure we see the work queued from * device wide flushes which doesn't use RCU for execution. */ mutex_lock(&old_worker->mutex); if (old_worker->killed) { mutex_unlock(&old_worker->mutex); return; } /* * We don't want to call synchronize_rcu for every vq during setup * because it will slow down VM startup. If we haven't done * VHOST_SET_VRING_KICK and not done the driver specific * SET_ENDPOINT/RUNNING then we can skip the sync since there will * not be any works queued for scsi and net. */ mutex_lock(&vq->mutex); if (!vhost_vq_get_backend(vq) && !vq->kick) { mutex_unlock(&vq->mutex); old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); /* * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. * Warn if it adds support for multiple workers but forgets to * handle the early queueing case. */ WARN_ON(!old_worker->attachment_cnt && !llist_empty(&old_worker->work_list)); return; } mutex_unlock(&vq->mutex); /* Make sure new vq queue/flush/poll calls see the new worker */ synchronize_rcu(); /* Make sure whatever was queued gets run */ __vhost_worker_flush(old_worker); old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); } /* Caller must have device mutex */ static int vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_vring_worker *info) { unsigned long index = info->worker_id; struct vhost_dev *dev = vq->dev; struct vhost_worker *worker; if (!dev->use_worker) return -EINVAL; worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); if (!worker || worker->id != info->worker_id) return -ENODEV; __vhost_vq_attach_worker(vq, worker); return 0; } /* Caller must have device mutex */ static int vhost_new_worker(struct vhost_dev *dev, struct vhost_worker_state *info) { struct vhost_worker *worker; worker = vhost_worker_create(dev); if (!worker) return -ENOMEM; info->worker_id = worker->id; return 0; } /* Caller must have device mutex */ static int vhost_free_worker(struct vhost_dev *dev, struct vhost_worker_state *info) { unsigned long index = info->worker_id; struct vhost_worker *worker; worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); if (!worker || worker->id != info->worker_id) return -ENODEV; mutex_lock(&worker->mutex); if (worker->attachment_cnt || worker->killed) { mutex_unlock(&worker->mutex); return -EBUSY; } /* * A flush might have raced and snuck in before attachment_cnt was set * to zero. Make sure flushes are flushed from the queue before * freeing. */ __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); vhost_worker_destroy(dev, worker); return 0; } static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp, struct vhost_virtqueue **vq, u32 *id) { u32 __user *idxp = argp; u32 idx; long r; r = get_user(idx, idxp); if (r < 0) return r; if (idx >= dev->nvqs) return -ENOBUFS; idx = array_index_nospec(idx, dev->nvqs); *vq = dev->vqs[idx]; *id = idx; return 0; } /* Caller must have device mutex */ long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, void __user *argp) { struct vhost_vring_worker ring_worker; struct vhost_worker_state state; struct vhost_worker *worker; struct vhost_virtqueue *vq; long ret; u32 idx; if (!dev->use_worker) return -EINVAL; if (!vhost_dev_has_owner(dev)) return -EINVAL; ret = vhost_dev_check_owner(dev); if (ret) return ret; switch (ioctl) { /* dev worker ioctls */ case VHOST_NEW_WORKER: /* * vhost_tasks will account for worker threads under the parent's * NPROC value but kthreads do not. To avoid userspace overflowing * the system with worker threads fork_owner must be true. */ if (!dev->fork_owner) return -EFAULT; ret = vhost_new_worker(dev, &state); if (!ret && copy_to_user(argp, &state, sizeof(state))) ret = -EFAULT; return ret; case VHOST_FREE_WORKER: if (copy_from_user(&state, argp, sizeof(state))) return -EFAULT; return vhost_free_worker(dev, &state); /* vring worker ioctls */ case VHOST_ATTACH_VRING_WORKER: case VHOST_GET_VRING_WORKER: break; default: return -ENOIOCTLCMD; } ret = vhost_get_vq_from_user(dev, argp, &vq, &idx); if (ret) return ret; switch (ioctl) { case VHOST_ATTACH_VRING_WORKER: if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) { ret = -EFAULT; break; } ret = vhost_vq_attach_worker(vq, &ring_worker); break; case VHOST_GET_VRING_WORKER: worker = rcu_dereference_check(vq->worker, lockdep_is_held(&dev->mutex)); if (!worker) { ret = -EINVAL; break; } ring_worker.index = idx; ring_worker.worker_id = worker->id; if (copy_to_user(argp, &ring_worker, sizeof(ring_worker))) ret = -EFAULT; break; default: ret = -ENOIOCTLCMD; break; } return ret; } EXPORT_SYMBOL_GPL(vhost_worker_ioctl); /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { struct vhost_worker *worker; int err, i; /* Is there an owner already? */ if (vhost_dev_has_owner(dev)) { err = -EBUSY; goto err_mm; } vhost_attach_mm(dev); err = vhost_dev_alloc_iovecs(dev); if (err) goto err_iovecs; if (dev->use_worker) { /* * This should be done last, because vsock can queue work * before VHOST_SET_OWNER so it simplifies the failure path * below since we don't have to worry about vsock queueing * while we free the worker. */ worker = vhost_worker_create(dev); if (!worker) { err = -ENOMEM; goto err_worker; } for (i = 0; i < dev->nvqs; i++) __vhost_vq_attach_worker(dev->vqs[i], worker); } return 0; err_worker: vhost_dev_free_iovecs(dev); err_iovecs: vhost_detach_mm(dev); err_mm: return err; } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); static struct vhost_iotlb *iotlb_alloc(void) { return vhost_iotlb_alloc(max_iotlb_entries, VHOST_IOTLB_FLAG_RETIRE); } struct vhost_iotlb *vhost_dev_reset_owner_prepare(void) { return iotlb_alloc(); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) { int i; vhost_dev_cleanup(dev); dev->fork_owner = fork_from_owner_default; dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); void vhost_dev_stop(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) vhost_poll_stop(&dev->vqs[i]->poll); } vhost_dev_flush(dev); } EXPORT_SYMBOL_GPL(vhost_dev_stop); void vhost_clear_msg(struct vhost_dev *dev) { struct vhost_msg_node *node, *n; spin_lock(&dev->iotlb_lock); list_for_each_entry_safe(node, n, &dev->read_list, node) { list_del(&node->node); kfree(node); } list_for_each_entry_safe(node, n, &dev->pending_list, node) { list_del(&node->node); kfree(node); } spin_unlock(&dev->iotlb_lock); } EXPORT_SYMBOL_GPL(vhost_clear_msg); void vhost_dev_cleanup(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->error_ctx) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) fput(dev->vqs[i]->kick); if (dev->vqs[i]->call_ctx.ctx) eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); if (dev->log_ctx) eventfd_ctx_put(dev->log_ctx); dev->log_ctx = NULL; /* No one will access memory at this point */ vhost_iotlb_free(dev->umem); dev->umem = NULL; vhost_iotlb_free(dev->iotlb); dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); vhost_workers_free(dev); vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) { u64 a = addr / VHOST_PAGE_SIZE / 8; /* Make sure 64 bit math will not overflow. */ if (a > ULONG_MAX - (unsigned long)log_base || a + (unsigned long)log_base > ULONG_MAX) return false; return access_ok(log_base + a, (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } /* Make sure 64 bit math will not overflow. */ static bool vhost_overflow(u64 uaddr, u64 size) { if (uaddr > ULONG_MAX || size > ULONG_MAX) return true; if (!size) return false; return uaddr > ULONG_MAX - size + 1; } /* Caller should have vq mutex and device mutex. */ static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, int log_all) { struct vhost_iotlb_map *map; if (!umem) return false; list_for_each_entry(map, &umem->list, link) { unsigned long a = map->addr; if (vhost_overflow(map->addr, map->size)) return false; if (!access_ok((void __user *)a, map->size)) return false; else if (log_all && !log_access_ok(log_base, map->start, map->size)) return false; } return true; } static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, u64 addr, unsigned int size, int type) { const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; if (!map) return NULL; return (void __user *)(uintptr_t)(map->addr + addr - map->start); } /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, int log_all) { int i; for (i = 0; i < d->nvqs; ++i) { bool ok; bool log; mutex_lock(&d->vqs[i]->mutex); log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) ok = vq_memory_access_ok(d->vqs[i]->log_base, umem, log); else ok = true; mutex_unlock(&d->vqs[i]->mutex); if (!ok) return false; } return true; } static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access); static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, const void *from, unsigned size) { int ret; if (!vq->iotlb) return __copy_to_user(to, from, size); else { /* This function should be called after iotlb * prefetch, which means we're sure that all vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ struct iov_iter t; void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)to, size, VHOST_ADDR_USED); if (uaddr) return __copy_to_user(uaddr, from, size); ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_WO); if (ret < 0) goto out; iov_iter_init(&t, ITER_DEST, vq->iotlb_iov, ret, size); ret = copy_to_iter(from, size, &t); if (ret == size) ret = 0; } out: return ret; } static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, void __user *from, unsigned size) { int ret; if (!vq->iotlb) return __copy_from_user(to, from, size); else { /* This function should be called after iotlb * prefetch, which means we're sure that vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)from, size, VHOST_ADDR_DESC); struct iov_iter f; if (uaddr) return __copy_from_user(to, uaddr, size); ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); if (ret < 0) { vq_err(vq, "IOTLB translation failure: uaddr " "%p size 0x%llx\n", from, (unsigned long long) size); goto out; } iov_iter_init(&f, ITER_SOURCE, vq->iotlb_iov, ret, size); ret = copy_from_iter(to, size, &f); if (ret == size) ret = 0; } out: return ret; } static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, void __user *addr, unsigned int size, int type) { int ret; ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); if (ret < 0) { vq_err(vq, "IOTLB translation failure: uaddr " "%p size 0x%llx\n", addr, (unsigned long long) size); return NULL; } if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { vq_err(vq, "Non atomic userspace memory access: uaddr " "%p size 0x%llx\n", addr, (unsigned long long) size); return NULL; } return vq->iotlb_iov[0].iov_base; } /* This function should be called after iotlb * prefetch, which means we're sure that vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, void __user *addr, unsigned int size, int type) { void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)addr, size, type); if (uaddr) return uaddr; return __vhost_get_user_slow(vq, addr, size, type); } #define vhost_put_user(vq, x, ptr) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ __typeof__(ptr) to = \ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ sizeof(*ptr), VHOST_ADDR_USED); \ if (to != NULL) \ ret = __put_user(x, to); \ else \ ret = -EFAULT; \ } \ ret; \ }) static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)); } static inline int vhost_put_used(struct vhost_virtqueue *vq, struct vring_used_elem *head, int idx, int count) { return vhost_copy_to_user(vq, vq->used->ring + idx, head, count * sizeof(*head)); } static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags); } static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx); } #define vhost_get_user(vq, x, ptr, type) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __get_user(x, ptr); \ } else { \ __typeof__(ptr) from = \ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ sizeof(*ptr), \ type); \ if (from != NULL) \ ret = __get_user(x, from); \ else \ ret = -EFAULT; \ } \ ret; \ }) #define vhost_get_avail(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) #define vhost_get_used(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) mutex_unlock(&d->vqs[i]->mutex); } static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq) { __virtio16 idx; int r; r = vhost_get_avail(vq, idx, &vq->avail->idx); if (unlikely(r < 0)) { vq_err(vq, "Failed to access available index at %p (%d)\n", &vq->avail->idx, r); return r; } /* Check it isn't doing very strange thing with available indexes */ vq->avail_idx = vhost16_to_cpu(vq, idx); if (unlikely((u16)(vq->avail_idx - vq->last_avail_idx) > vq->num)) { vq_err(vq, "Invalid available index change from %u to %u", vq->last_avail_idx, vq->avail_idx); return -EINVAL; } /* We're done if there is nothing new */ if (vq->avail_idx == vq->last_avail_idx) return 0; /* * We updated vq->avail_idx so we need a memory barrier between * the index read above and the caller reading avail ring entries. */ smp_rmb(); return 1; } static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, __virtio16 *head, int idx) { return vhost_get_avail(vq, *head, &vq->avail->ring[idx & (vq->num - 1)]); } static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, __virtio16 *flags) { return vhost_get_avail(vq, *flags, &vq->avail->flags); } static inline int vhost_get_used_event(struct vhost_virtqueue *vq, __virtio16 *event) { return vhost_get_avail(vq, *event, vhost_used_event(vq)); } static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { return vhost_get_used(vq, *idx, &vq->used->idx); } static inline int vhost_get_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, int idx) { return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); } static void vhost_iotlb_notify_vq(struct vhost_dev *d, struct vhost_iotlb_msg *msg) { struct vhost_msg_node *node, *n; spin_lock(&d->iotlb_lock); list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); kfree(node); } } spin_unlock(&d->iotlb_lock); } static bool umem_access_ok(u64 uaddr, u64 size, int access) { unsigned long a = uaddr; /* Make sure 64 bit math will not overflow. */ if (vhost_overflow(uaddr, size)) return false; if ((access & VHOST_ACCESS_RO) && !access_ok((void __user *)a, size)) return false; if ((access & VHOST_ACCESS_WO) && !access_ok((void __user *)a, size)) return false; return true; } static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { int ret = 0; if (asid != 0) return -EINVAL; mutex_lock(&dev->mutex); vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: if (!dev->iotlb) { ret = -EFAULT; break; } if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) { ret = -EFAULT; break; } vhost_vq_meta_reset(dev); if (vhost_iotlb_add_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1, msg->uaddr, msg->perm)) { ret = -ENOMEM; break; } vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: if (!dev->iotlb) { ret = -EFAULT; break; } vhost_vq_meta_reset(dev); vhost_iotlb_del_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); break; default: ret = -EINVAL; break; } vhost_dev_unlock_vqs(dev); mutex_unlock(&dev->mutex); return ret; } ssize_t vhost_chr_write_iter(struct vhost_dev *dev, struct iov_iter *from) { struct vhost_iotlb_msg msg; size_t offset; int type, ret; u32 asid = 0; ret = copy_from_iter(&type, sizeof(type), from); if (ret != sizeof(type)) { ret = -EINVAL; goto done; } switch (type) { case VHOST_IOTLB_MSG: /* There maybe a hole after type for V1 message type, * so skip it here. */ offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); break; case VHOST_IOTLB_MSG_V2: if (vhost_backend_has_feature(dev->vqs[0], VHOST_BACKEND_F_IOTLB_ASID)) { ret = copy_from_iter(&asid, sizeof(asid), from); if (ret != sizeof(asid)) { ret = -EINVAL; goto done; } offset = 0; } else offset = sizeof(__u32); break; default: ret = -EINVAL; goto done; } iov_iter_advance(from, offset); ret = copy_from_iter(&msg, sizeof(msg), from); if (ret != sizeof(msg)) { ret = -EINVAL; goto done; } if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) { ret = -EINVAL; goto done; } if (dev->msg_handler) ret = dev->msg_handler(dev, asid, &msg); else ret = vhost_process_iotlb_msg(dev, asid, &msg); if (ret) { ret = -EFAULT; goto done; } ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) : sizeof(struct vhost_msg_v2); done: return ret; } EXPORT_SYMBOL(vhost_chr_write_iter); __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, poll_table *wait) { __poll_t mask = 0; poll_wait(file, &dev->wait, wait); if (!list_empty(&dev->read_list)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } EXPORT_SYMBOL(vhost_chr_poll); ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, int noblock) { DEFINE_WAIT(wait); struct vhost_msg_node *node; ssize_t ret = 0; unsigned size = sizeof(struct vhost_msg); if (iov_iter_count(to) < size) return 0; while (1) { if (!noblock) prepare_to_wait(&dev->wait, &wait, TASK_INTERRUPTIBLE); node = vhost_dequeue_msg(dev, &dev->read_list); if (node) break; if (noblock) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!dev->iotlb) { ret = -EBADFD; break; } schedule(); } if (!noblock) finish_wait(&dev->wait, &wait); if (node) { struct vhost_iotlb_msg *msg; void *start = &node->msg; switch (node->msg.type) { case VHOST_IOTLB_MSG: size = sizeof(node->msg); msg = &node->msg.iotlb; break; case VHOST_IOTLB_MSG_V2: size = sizeof(node->msg_v2); msg = &node->msg_v2.iotlb; break; default: BUG(); break; } ret = copy_to_iter(start, size, to); if (ret != size || msg->type != VHOST_IOTLB_MISS) { kfree(node); return ret; } vhost_enqueue_msg(dev, &dev->pending_list, node); } return ret; } EXPORT_SYMBOL_GPL(vhost_chr_read_iter); static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) { struct vhost_dev *dev = vq->dev; struct vhost_msg_node *node; struct vhost_iotlb_msg *msg; bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2); node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG); if (!node) return -ENOMEM; if (v2) { node->msg_v2.type = VHOST_IOTLB_MSG_V2; msg = &node->msg_v2.iotlb; } else { msg = &node->msg.iotlb; } msg->type = VHOST_IOTLB_MISS; msg->iova = iova; msg->perm = access; vhost_enqueue_msg(dev, &dev->read_list, node); return 0; } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, vring_desc_t __user *desc, vring_avail_t __user *avail, vring_used_t __user *used) { /* If an IOTLB device is present, the vring addresses are * GIOVAs. Access validation occurs at prefetch time. */ if (vq->iotlb) return true; return access_ok(desc, vhost_get_desc_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(used, vhost_get_used_size(vq, num)); } static void vhost_vq_meta_update(struct vhost_virtqueue *vq, const struct vhost_iotlb_map *map, int type) { int access = (type == VHOST_ADDR_USED) ? VHOST_ACCESS_WO : VHOST_ACCESS_RO; if (likely(map->perm & access)) vq->meta_iotlb[type] = map; } static bool iotlb_access_ok(struct vhost_virtqueue *vq, int access, u64 addr, u64 len, int type) { const struct vhost_iotlb_map *map; struct vhost_iotlb *umem = vq->iotlb; u64 s = 0, size, orig_addr = addr, last = addr + len - 1; if (vhost_vq_meta_fetch(vq, addr, len, type)) return true; while (len > s) { map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { vhost_iotlb_miss(vq, addr, access); return false; } else if (!(map->perm & access)) { /* Report the possible access violation by * request another translation from userspace. */ return false; } size = map->size - addr + map->start; if (orig_addr == addr && size >= len) vhost_vq_meta_update(vq, map, type); s += size; addr += size; } return true; } int vq_meta_prefetch(struct vhost_virtqueue *vq) { unsigned int num = vq->num; if (!vq->iotlb) return 1; return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, vhost_get_avail_size(vq, num), VHOST_ADDR_AVAIL) && iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, vhost_get_used_size(vq, num), VHOST_ADDR_USED); } EXPORT_SYMBOL_GPL(vq_meta_prefetch); /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ bool vhost_log_access_ok(struct vhost_dev *dev) { return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, void __user *log_base, bool log_used, u64 log_addr) { /* If an IOTLB device is present, log_addr is a GIOVA that * will never be logged by log_used(). */ if (vq->iotlb) return true; return !log_used || log_access_ok(log_base, log_addr, vhost_get_used_size(vq, vq->num)); } /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static bool vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) { if (!vq_log_access_ok(vq, vq->log_base)) return false; return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem; struct vhost_memory_region *region; struct vhost_iotlb *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; if (copy_from_user(&mem, m, size)) return -EFAULT; if (mem.padding) return -EOPNOTSUPP; if (mem.nregions > max_mem_regions) return -E2BIG; newmem = kvzalloc(struct_size(newmem, regions, mem.nregions), GFP_KERNEL); if (!newmem) return -ENOMEM; memcpy(newmem, &mem, size); if (copy_from_user(newmem->regions, m->regions, flex_array_size(newmem, regions, mem.nregions))) { kvfree(newmem); return -EFAULT; } newumem = iotlb_alloc(); if (!newumem) { kvfree(newmem); return -ENOMEM; } for (region = newmem->regions; region < newmem->regions + mem.nregions; region++) { if (vhost_iotlb_add_range(newumem, region->guest_phys_addr, region->guest_phys_addr + region->memory_size - 1, region->userspace_addr, VHOST_MAP_RW)) goto err; } if (!memory_access_ok(d, newumem, 0)) goto err; oldumem = d->umem; d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } kvfree(newmem); vhost_iotlb_free(oldumem); return 0; err: vhost_iotlb_free(newumem); kvfree(newmem); return -EFAULT; } static long vhost_vring_set_num(struct vhost_dev *d, struct vhost_virtqueue *vq, void __user *argp) { struct vhost_vring_state s; /* Resizing ring with an active backend? * You don't want to do that. */ if (vq->private_data) return -EBUSY; if (copy_from_user(&s, argp, sizeof s)) return -EFAULT; if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) return -EINVAL; vq->num = s.num; return 0; } static long vhost_vring_set_addr(struct vhost_dev *d, struct vhost_virtqueue *vq, void __user *argp) { struct vhost_vring_addr a; if (copy_from_user(&a, argp, sizeof a)) return -EFAULT; if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) return -EOPNOTSUPP; /* For 32bit, verify that the top 32bits of the user data are set to zero. */ if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || (u64)(unsigned long)a.used_user_addr != a.used_user_addr || (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) return -EFAULT; /* Make sure it's safe to cast pointers to vring types. */ BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) return -EINVAL; /* We only verify access here if backend is configured. * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { if (!vq_access_ok(vq, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) return -EINVAL; /* Also validate log access for used ring if enabled. */ if (!vq_log_used_access_ok(vq, vq->log_base, a.flags & (0x1 << VHOST_VRING_F_LOG), a.log_guest_addr)) return -EINVAL; } vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); vq->desc = (void __user *)(unsigned long)a.desc_user_addr; vq->avail = (void __user *)(unsigned long)a.avail_user_addr; vq->log_addr = a.log_guest_addr; vq->used = (void __user *)(unsigned long)a.used_user_addr; return 0; } static long vhost_vring_set_num_addr(struct vhost_dev *d, struct vhost_virtqueue *vq, unsigned int ioctl, void __user *argp) { long r; mutex_lock(&vq->mutex); switch (ioctl) { case VHOST_SET_VRING_NUM: r = vhost_vring_set_num(d, vq, argp); break; case VHOST_SET_VRING_ADDR: r = vhost_vring_set_addr(d, vq, argp); break; default: BUG(); } mutex_unlock(&vq->mutex); return r; } long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; bool pollstart = false, pollstop = false; struct eventfd_ctx *ctx = NULL; struct vhost_virtqueue *vq; struct vhost_vring_state s; struct vhost_vring_file f; u32 idx; long r; r = vhost_get_vq_from_user(d, argp, &vq, &idx); if (r < 0) return r; if (ioctl == VHOST_SET_VRING_NUM || ioctl == VHOST_SET_VRING_ADDR) { return vhost_vring_set_num_addr(d, vq, ioctl, argp); } mutex_lock(&vq->mutex); switch (ioctl) { case VHOST_SET_VRING_BASE: /* Moving base with an active backend? * You don't want to do that. */ if (vq->private_data) { r = -EBUSY; break; } if (copy_from_user(&s, argp, sizeof s)) { r = -EFAULT; break; } if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { vq->next_avail_head = vq->last_avail_idx = s.num & 0xffff; vq->last_used_idx = (s.num >> 16) & 0xffff; } else { if (s.num > 0xffff) { r = -EINVAL; break; } vq->next_avail_head = vq->last_avail_idx = s.num; } /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; break; case VHOST_GET_VRING_BASE: s.index = idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); else s.num = vq->last_avail_idx; if (copy_to_user(argp, &s, sizeof s)) r = -EFAULT; break; case VHOST_SET_VRING_KICK: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; } if (eventfp != vq->kick) { pollstop = (filep = vq->kick) != NULL; pollstart = (vq->kick = eventfp) != NULL; } else filep = eventfp; break; case VHOST_SET_VRING_CALL: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, vq->call_ctx.ctx); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, vq->error_ctx); break; case VHOST_SET_VRING_ENDIAN: r = vhost_set_vring_endian(vq, argp); break; case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: if (copy_from_user(&s, argp, sizeof(s))) { r = -EFAULT; break; } vq->busyloop_timeout = s.num; break; case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: s.index = idx; s.num = vq->busyloop_timeout; if (copy_to_user(argp, &s, sizeof(s))) r = -EFAULT; break; default: r = -ENOIOCTLCMD; } if (pollstop && vq->handle_kick) vhost_poll_stop(&vq->poll); if (!IS_ERR_OR_NULL(ctx)) eventfd_ctx_put(ctx); if (filep) fput(filep); if (pollstart && vq->handle_kick) r = vhost_poll_start(&vq->poll, vq->kick); mutex_unlock(&vq->mutex); if (pollstop && vq->handle_kick) vhost_dev_flush(vq->poll.dev); return r; } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); int vhost_init_device_iotlb(struct vhost_dev *d) { struct vhost_iotlb *niotlb, *oiotlb; int i; niotlb = iotlb_alloc(); if (!niotlb) return -ENOMEM; oiotlb = d->iotlb; d->iotlb = niotlb; for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq = d->vqs[i]; mutex_lock(&vq->mutex); vq->iotlb = niotlb; __vhost_vq_meta_reset(vq); mutex_unlock(&vq->mutex); } vhost_iotlb_free(oiotlb); return 0; } EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct eventfd_ctx *ctx; u64 p; long r; int i, fd; /* If you are not the owner, you can become one */ if (ioctl == VHOST_SET_OWNER) { r = vhost_dev_set_owner(d); goto done; } #ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL if (ioctl == VHOST_SET_FORK_FROM_OWNER) { /* Only allow modification before owner is set */ if (vhost_dev_has_owner(d)) { r = -EBUSY; goto done; } u8 fork_owner_val; if (get_user(fork_owner_val, (u8 __user *)argp)) { r = -EFAULT; goto done; } if (fork_owner_val != VHOST_FORK_OWNER_TASK && fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { r = -EINVAL; goto done; } d->fork_owner = !!fork_owner_val; r = 0; goto done; } if (ioctl == VHOST_GET_FORK_FROM_OWNER) { u8 fork_owner_val = d->fork_owner; if (fork_owner_val != VHOST_FORK_OWNER_TASK && fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { r = -EINVAL; goto done; } if (put_user(fork_owner_val, (u8 __user *)argp)) { r = -EFAULT; goto done; } r = 0; goto done; } #endif /* You must be the owner to do anything else */ r = vhost_dev_check_owner(d); if (r) goto done; switch (ioctl) { case VHOST_SET_MEM_TABLE: r = vhost_set_memory(d, argp); break; case VHOST_SET_LOG_BASE: if (copy_from_user(&p, argp, sizeof p)) { r = -EFAULT; break; } if ((u64)(unsigned long)p != p) { r = -EFAULT; break; } for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq; void __user *base = (void __user *)(unsigned long)p; vq = d->vqs[i]; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ if (vq->private_data && !vq_log_access_ok(vq, base)) r = -EFAULT; else vq->log_base = base; mutex_unlock(&vq->mutex); } break; case VHOST_SET_LOG_FD: r = get_user(fd, (int __user *)argp); if (r < 0) break; ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, d->log_ctx); for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); d->vqs[i]->log_ctx = d->log_ctx; mutex_unlock(&d->vqs[i]->mutex); } if (ctx) eventfd_ctx_put(ctx); break; default: r = -ENOIOCTLCMD; break; } done: return r; } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/arch/x86/exception-tables.rst. */ static int set_bit_to_user(int nr, void __user *addr) { unsigned long log = (unsigned long)addr; struct page *page; void *base; int bit = nr + (log % PAGE_SIZE) * 8; int r; r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); if (r < 0) return r; BUG_ON(r != 1); base = kmap_atomic(page); set_bit(bit, base); kunmap_atomic(base); unpin_user_pages_dirty_lock(&page, 1, true); return 0; } static int log_write(void __user *log_base, u64 write_address, u64 write_length) { u64 write_page = write_address / VHOST_PAGE_SIZE; int r; if (!write_length) return 0; write_length += write_address % VHOST_PAGE_SIZE; for (;;) { u64 base = (u64)(unsigned long)log_base; u64 log = base + write_page / 8; int bit = write_page % 8; if ((u64)(unsigned long)log != log) return -EFAULT; r = set_bit_to_user(bit, (void __user *)(unsigned long)log); if (r < 0) return r; if (write_length <= VHOST_PAGE_SIZE) break; write_length -= VHOST_PAGE_SIZE; write_page += 1; } return r; } static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) { struct vhost_iotlb *umem = vq->umem; struct vhost_iotlb_map *u; u64 start, end, l, min; int r; bool hit = false; while (len) { min = len; /* More than one GPAs can be mapped into a single HVA. So * iterate all possible umems here to be safe. */ list_for_each_entry(u, &umem->list, link) { if (u->addr > hva - 1 + len || u->addr - 1 + u->size < hva) continue; start = max(u->addr, hva); end = min(u->addr - 1 + u->size, hva - 1 + len); l = end - start + 1; r = log_write(vq->log_base, u->start + start - u->addr, l); if (r < 0) return r; hit = true; min = min(l, min); } if (!hit) return -EFAULT; len -= min; hva += min; } return 0; } static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) { struct iovec *iov = vq->log_iov; int i, ret; if (!vq->iotlb) return log_write(vq->log_base, vq->log_addr + used_offset, len); ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, len, iov, 64, VHOST_ACCESS_WO); if (ret < 0) return ret; for (i = 0; i < ret; i++) { ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, iov[i].iov_len); if (ret) return ret; } return 0; } /* * vhost_log_write() - Log in dirty page bitmap * @vq: vhost virtqueue. * @log: Array of dirty memory in GPA. * @log_num: Size of vhost_log arrary. * @len: The total length of memory buffer to log in the dirty bitmap. * Some drivers may only partially use pages shared via the last * vring descriptor (i.e. vhost-net RX buffer). * Use (len == U64_MAX) to indicate the driver would log all * pages of vring descriptors. * @iov: Array of dirty memory in HVA. * @count: Size of iovec array. */ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); if (vq->iotlb) { for (i = 0; i < count; i++) { r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, iov[i].iov_len); if (r < 0) return r; } return 0; } for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); if (r < 0) return r; if (len != U64_MAX) len -= l; } if (vq->log_ctx) eventfd_signal(vq->log_ctx); return 0; } EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; if (vhost_put_used_flags(vq)) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; log_used(vq, (used - (void __user *)vq->used), sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return 0; } static int vhost_update_avail_event(struct vhost_virtqueue *vq) { if (vhost_put_avail_event(vq)) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; /* Make sure the event is seen before log. */ smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); log_used(vq, (used - (void __user *)vq->used), sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return 0; } int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; bool is_le = vq->is_le; if (!vq->private_data) return 0; vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) goto err; vq->signalled_used_valid = false; if (!vq->iotlb && !access_ok(&vq->used->idx, sizeof vq->used->idx)) { r = -EFAULT; goto err; } r = vhost_get_used_idx(vq, &last_used_idx); if (r) { vq_err(vq, "Can't access used idx at %p\n", &vq->used->idx); goto err; } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; err: vq->is_le = is_le; return r; } EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access) { const struct vhost_iotlb_map *map; struct vhost_dev *dev = vq->dev; struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0, last = addr + len - 1; int ret = 0; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { if (umem != dev->iotlb) { ret = -EFAULT; break; } ret = -EAGAIN; break; } else if (!(map->perm & access)) { ret = -EPERM; break; } _iov = iov + ret; size = map->size - addr + map->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) (map->addr + addr - map->start); s += size; addr += size; ++ret; } if (ret == -EAGAIN) vhost_iotlb_miss(vq, addr, access); return ret; } /* Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, * or -1U if we're at the end. */ static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) return -1U; /* Check they're not leading us off end of descriptors. */ next = vhost16_to_cpu(vq, READ_ONCE(desc->next)); return next; } static int get_indirect(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num, struct vring_desc *indirect) { struct vring_desc desc; unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { vq_err(vq, "Invalid length in indirect descriptor: " "len 0x%llx not multiple of 0x%zx\n", (unsigned long long)len, sizeof desc); return -EINVAL; } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, ITER_SOURCE, vq->indirect, ret, len); count = len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count > USHRT_MAX + 1)) { vq_err(vq, "Indirect buffer length too big: %d\n", indirect->len); return -E2BIG; } do { unsigned iov_count = *in_num + *out_num; if (unlikely(++found > count)) { vq_err(vq, "Loop detected: last one at %u " "indirect size %u\n", i, count); return -EINVAL; } if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) access = VHOST_ACCESS_WO; else access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count, access); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d indirect idx %d\n", ret, i); return ret; } /* If this is an input descriptor, increment that count. */ if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (unlikely(*in_num)) { vq_err(vq, "Indirect descriptor " "has out after in: idx %d\n", i); return -EINVAL; } *out_num += ret; } } while ((i = next_desc(vq, &desc)) != -1); return 0; } /* This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * * This function returns the descriptor number found, or vq->num (which is * never a valid descriptor number) if none was found. A negative code is * returned on error. */ int vhost_get_vq_desc(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num) { bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); struct vring_desc desc; unsigned int i, head, found = 0; u16 last_avail_idx = vq->last_avail_idx; __virtio16 ring_head; int ret, access, c = 0; if (vq->avail_idx == vq->last_avail_idx) { ret = vhost_get_avail_idx(vq); if (unlikely(ret < 0)) return ret; if (!ret) return vq->num; } if (in_order) head = vq->next_avail_head & (vq->num - 1); else { /* Grab the next descriptor number they're * advertising, and increment the index we've seen. */ if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); return -EFAULT; } head = vhost16_to_cpu(vq, ring_head); } /* If their number is silly, that's an error. */ if (unlikely(head >= vq->num)) { vq_err(vq, "Guest says index %u > %u is available", head, vq->num); return -EINVAL; } /* When we start there are none of either input nor output. */ *out_num = *in_num = 0; if (unlikely(log)) *log_num = 0; i = head; do { unsigned iov_count = *in_num + *out_num; if (unlikely(i >= vq->num)) { vq_err(vq, "Desc index is %u > %u, head = %u", i, vq->num, head); return -EINVAL; } if (unlikely(++found > vq->num)) { vq_err(vq, "Loop detected: last one at %u " "vq size %u head %u\n", i, vq->num, head); return -EINVAL; } ret = vhost_get_desc(vq, &desc, i); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); return -EFAULT; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) { ret = get_indirect(vq, iov, iov_size, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Failure detected " "in indirect descriptor at idx %d\n", i); return ret; } ++c; continue; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) access = VHOST_ACCESS_WO; else access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count, access); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d descriptor idx %d\n", ret, i); return ret; } if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (unlikely(*in_num)) { vq_err(vq, "Descriptor has out after in: " "idx %d\n", i); return -EINVAL; } *out_num += ret; } ++c; } while ((i = next_desc(vq, &desc)) != -1); /* On success, increment avail index. */ vq->last_avail_idx++; vq->next_avail_head += c; /* Assume notifications from guest are disabled at this point, * if they aren't we would need to update avail_event index. */ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } EXPORT_SYMBOL_GPL(vhost_get_vq_desc); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { vq->last_avail_idx -= n; } EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { struct vring_used_elem heads = { cpu_to_vhost32(vq, head), cpu_to_vhost32(vq, len) }; u16 nheads = 1; return vhost_add_used_n(vq, &heads, &nheads, 1); } EXPORT_SYMBOL_GPL(vhost_add_used); static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { vring_used_elem_t __user *used; u16 old, new; int start; start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (vhost_put_used(vq, heads, start, count)) { vq_err(vq, "Failed to write used"); return -EFAULT; } if (unlikely(vq->log_used)) { /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ log_used(vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); /* If the driver never bothers to signal in a very long while, * used index might wrap around. If that happens, invalidate * signalled_used index we stored. TODO: make sure driver * signals at least once in 2^16 and remove this. */ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) vq->signalled_used_valid = false; return 0; } static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { int start, n, r; start = vq->last_used_idx & (vq->num - 1); n = vq->num - start; if (n < count) { r = __vhost_add_used_n(vq, heads, n); if (r < 0) return r; heads += n; count -= n; } return __vhost_add_used_n(vq, heads, count); } static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq, struct vring_used_elem *heads, const u16 *nheads, unsigned count) { vring_used_elem_t __user *used; u16 old, new = vq->last_used_idx; int start, i; if (!nheads) return -EINVAL; start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; for (i = 0; i < count; i++) { if (vhost_put_used(vq, &heads[i], start, 1)) { vq_err(vq, "Failed to write used"); return -EFAULT; } start += nheads[i]; new += nheads[i]; if (start >= vq->num) start -= vq->num; } if (unlikely(vq->log_used)) { /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ log_used(vq, ((void __user *)used - (void __user *)vq->used), (vq->num - start) * sizeof *used); if (start + count > vq->num) log_used(vq, 0, (start + count - vq->num) * sizeof *used); } old = vq->last_used_idx; vq->last_used_idx = new; /* If the driver never bothers to signal in a very long while, * used index might wrap around. If that happens, invalidate * signalled_used index we stored. TODO: make sure driver * signals at least once in 2^16 and remove this. */ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) vq->signalled_used_valid = false; return 0; } /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, u16 *nheads, unsigned count) { bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); int r; if (!in_order || !nheads) r = vhost_add_used_n_ooo(vq, heads, count); else r = vhost_add_used_n_in_order(vq, heads, nheads, count); if (r < 0) return r; /* Make sure buffer is written before we update index. */ smp_wmb(); if (vhost_put_used_idx(vq)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } if (unlikely(vq->log_used)) { /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ log_used(vq, offsetof(struct vring_used, idx), sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return r; } EXPORT_SYMBOL_GPL(vhost_add_used_n); static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { __u16 old, new; __virtio16 event; bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && unlikely(vq->avail_idx == vq->last_avail_idx)) return true; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; if (vhost_get_avail_flags(vq, &flags)) { vq_err(vq, "Failed to get flags"); return true; } return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT)); } old = vq->signalled_used; v = vq->signalled_used_valid; new = vq->signalled_used = vq->last_used_idx; vq->signalled_used_valid = true; if (unlikely(!v)) return true; if (vhost_get_used_event(vq, &event)) { vq_err(vq, "Failed to get used event idx"); return true; } return vring_need_event(vhost16_to_cpu(vq, event), new, old); } /* This actually signals the guest, using eventfd. */ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ if (vq->call_ctx.ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx.ctx); } EXPORT_SYMBOL_GPL(vhost_signal); /* And here's the combo meal deal. Supersize me! */ void vhost_add_used_and_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned int head, int len) { vhost_add_used(vq, head, len); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal); /* multi-buffer version of vhost_add_used_and_signal */ void vhost_add_used_and_signal_n(struct vhost_dev *dev, struct vhost_virtqueue *vq, struct vring_used_elem *heads, u16 *nheads, unsigned count) { vhost_add_used_n(vq, heads, nheads, count); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); /* return true if we're sure that available ring is empty */ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->avail_idx != vq->last_avail_idx) return false; r = vhost_get_avail_idx(vq); /* Note: we treat error as non-empty here */ return r == 0; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) { vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); return false; } } else { r = vhost_update_avail_event(vq); if (r) { vq_err(vq, "Failed to update avail event index at %p: %d\n", vhost_avail_event(vq), r); return false; } } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); r = vhost_get_avail_idx(vq); /* Note: we treat error as empty here */ if (unlikely(r < 0)) return false; return r; } EXPORT_SYMBOL_GPL(vhost_enable_notify); /* We don't need to be notified again. */ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) vq_err(vq, "Failed to disable notification at %p: %d\n", &vq->used->flags, r); } } EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { /* Make sure all padding within the structure is initialized. */ struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; node->vq = vq; node->msg.type = type; return node; } EXPORT_SYMBOL_GPL(vhost_new_msg); void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, struct vhost_msg_node *node) { spin_lock(&dev->iotlb_lock); list_add_tail(&node->node, head); spin_unlock(&dev->iotlb_lock); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); } EXPORT_SYMBOL_GPL(vhost_enqueue_msg); struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, struct list_head *head) { struct vhost_msg_node *node = NULL; spin_lock(&dev->iotlb_lock); if (!list_empty(head)) { node = list_first_entry(head, struct vhost_msg_node, node); list_del(&node->node); } spin_unlock(&dev->iotlb_lock); return node; } EXPORT_SYMBOL_GPL(vhost_dequeue_msg); void vhost_set_backend_features(struct vhost_dev *dev, u64 features) { struct vhost_virtqueue *vq; int i; mutex_lock(&dev->mutex); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; mutex_lock(&vq->mutex); vq->acked_backend_features = features; mutex_unlock(&vq->mutex); } mutex_unlock(&dev->mutex); } EXPORT_SYMBOL_GPL(vhost_set_backend_features); static int __init vhost_init(void) { return 0; } static void __exit vhost_exit(void) { } module_init(vhost_init); module_exit(vhost_exit); MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("Host kernel accelerator for virtio");
15 31 1 1 15 2 1 1 1 2 1 1 1 1 1 1 1 29 1 14 16 16 16 15 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 // SPDX-License-Identifier: GPL-2.0-or-later /* Linux driver for Philips webcam USB and Video4Linux interface part. (C) 1999-2004 Nemosoft Unv. (C) 2004-2006 Luc Saillard (luc@saillard.org) (C) 2011 Hans de Goede <hdegoede@redhat.com> NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. The decompression routines have been implemented by reverse-engineering the Nemosoft binary pwcx module. Caveat emptor. */ /* This code forms the interface between the USB layers and the Philips specific stuff. Some adanved stuff of the driver falls under an NDA, signed between me and Philips B.V., Eindhoven, the Netherlands, and is thus not distributed in source form. The binary pwcx.o module contains the code that falls under the NDA. In case you're wondering: 'pwc' stands for "Philips WebCam", but I really didn't want to type 'philips_web_cam' every time (I'm lazy as any Linux kernel hacker, but I don't like uncomprehensible abbreviations without explanation). Oh yes, convention: to disctinguish between all the various pointers to device-structures, I use these names for the pointer variables: udev: struct usb_device * vdev: struct video_device (member of pwc_dev) pdev: struct pwc_devive * */ /* Contributors: - Alvarado: adding whitebalance code - Alistar Moire: QuickCam 3000 Pro device/product ID - Tony Hoyle: Creative Labs Webcam 5 device/product ID - Mark Burazin: solving hang in VIDIOCSYNC when camera gets unplugged - Jk Fang: Sotec Afina Eye ID - Xavier Roche: QuickCam Pro 4000 ID - Jens Knudsen: QuickCam Zoom ID - J. Debert: QuickCam for Notebooks ID - Pham Thanh Nam: webcam snapshot button as an event input device */ #include <linux/errno.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/slab.h> #ifdef CONFIG_USB_PWC_INPUT_EVDEV #include <linux/usb/input.h> #endif #include <linux/vmalloc.h> #include <asm/io.h> #include "pwc.h" #include "pwc-kiara.h" #include "pwc-timon.h" #include "pwc-dec23.h" #include "pwc-dec1.h" #define CREATE_TRACE_POINTS #include <trace/events/pwc.h> /* Function prototypes and driver templates */ /* hotplug device table support */ static const struct usb_device_id pwc_device_table [] = { { USB_DEVICE(0x041E, 0x400C) }, /* Creative Webcam 5 */ { USB_DEVICE(0x041E, 0x4011) }, /* Creative Webcam Pro Ex */ { USB_DEVICE(0x046D, 0x08B0) }, /* Logitech QuickCam 3000 Pro */ { USB_DEVICE(0x046D, 0x08B1) }, /* Logitech QuickCam Notebook Pro */ { USB_DEVICE(0x046D, 0x08B2) }, /* Logitech QuickCam 4000 Pro */ { USB_DEVICE(0x046D, 0x08B3) }, /* Logitech QuickCam Zoom (old model) */ { USB_DEVICE(0x046D, 0x08B4) }, /* Logitech QuickCam Zoom (new model) */ { USB_DEVICE(0x046D, 0x08B5) }, /* Logitech QuickCam Orbit/Sphere */ { USB_DEVICE(0x046D, 0x08B6) }, /* Logitech/Cisco VT Camera */ { USB_DEVICE(0x046D, 0x08B7) }, /* Logitech ViewPort AV 100 */ { USB_DEVICE(0x046D, 0x08B8) }, /* Logitech QuickCam */ { USB_DEVICE(0x0471, 0x0302) }, /* Philips PCA645VC */ { USB_DEVICE(0x0471, 0x0303) }, /* Philips PCA646VC */ { USB_DEVICE(0x0471, 0x0304) }, /* Askey VC010 type 2 */ { USB_DEVICE(0x0471, 0x0307) }, /* Philips PCVC675K (Vesta) */ { USB_DEVICE(0x0471, 0x0308) }, /* Philips PCVC680K (Vesta Pro) */ { USB_DEVICE(0x0471, 0x030C) }, /* Philips PCVC690K (Vesta Pro Scan) */ { USB_DEVICE(0x0471, 0x0310) }, /* Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) */ { USB_DEVICE(0x0471, 0x0311) }, /* Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) */ { USB_DEVICE(0x0471, 0x0312) }, /* Philips PCVC750K (ToUCam Pro Scan) */ { USB_DEVICE(0x0471, 0x0313) }, /* Philips PCVC720K/40 (ToUCam XS) */ { USB_DEVICE(0x0471, 0x0329) }, /* Philips SPC 900NC webcam */ { USB_DEVICE(0x0471, 0x032C) }, /* Philips SPC 880NC webcam */ { USB_DEVICE(0x04CC, 0x8116) }, /* Sotec Afina Eye */ { USB_DEVICE(0x055D, 0x9000) }, /* Samsung MPC-C10 */ { USB_DEVICE(0x055D, 0x9001) }, /* Samsung MPC-C30 */ { USB_DEVICE(0x055D, 0x9002) }, /* Samsung SNC-35E (Ver3.0) */ { USB_DEVICE(0x069A, 0x0001) }, /* Askey VC010 type 1 */ { USB_DEVICE(0x06BE, 0x8116) }, /* AME Co. Afina Eye */ { USB_DEVICE(0x0d81, 0x1900) }, /* Visionite VCS-UC300 */ { USB_DEVICE(0x0d81, 0x1910) }, /* Visionite VCS-UM100 */ { } }; MODULE_DEVICE_TABLE(usb, pwc_device_table); static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id); static void usb_pwc_disconnect(struct usb_interface *intf); static void pwc_isoc_cleanup(struct pwc_device *pdev); static struct usb_driver pwc_driver = { .name = "Philips webcam", /* name */ .id_table = pwc_device_table, .probe = usb_pwc_probe, /* probe() */ .disconnect = usb_pwc_disconnect, /* disconnect() */ }; #define MAX_DEV_HINTS 20 #define MAX_ISOC_ERRORS 20 #ifdef CONFIG_USB_PWC_DEBUG int pwc_trace = PWC_DEBUG_LEVEL; #endif static int power_save = -1; static int leds[2] = { 100, 0 }; /***/ static const struct v4l2_file_operations pwc_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .mmap = vb2_fop_mmap, .unlocked_ioctl = video_ioctl2, }; static const struct video_device pwc_template = { .name = "Philips Webcam", /* Filled in later */ .release = video_device_release_empty, .fops = &pwc_fops, .ioctl_ops = &pwc_ioctl_ops, }; /***************************************************************************/ /* Private functions */ static void *pwc_alloc_urb_buffer(struct usb_device *dev, size_t size, dma_addr_t *dma_handle) { struct device *dmadev = dev->bus->sysdev; void *buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return NULL; *dma_handle = dma_map_single(dmadev, buffer, size, DMA_FROM_DEVICE); if (dma_mapping_error(dmadev, *dma_handle)) { kfree(buffer); return NULL; } return buffer; } static void pwc_free_urb_buffer(struct usb_device *dev, size_t size, void *buffer, dma_addr_t dma_handle) { struct device *dmadev = dev->bus->sysdev; dma_unmap_single(dmadev, dma_handle, size, DMA_FROM_DEVICE); kfree(buffer); } static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev) { unsigned long flags = 0; struct pwc_frame_buf *buf = NULL; spin_lock_irqsave(&pdev->queued_bufs_lock, flags); if (list_empty(&pdev->queued_bufs)) goto leave; buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list); list_del(&buf->list); leave: spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); return buf; } static void pwc_snapshot_button(struct pwc_device *pdev, int down) { if (down) { PWC_TRACE("Snapshot button pressed.\n"); } else { PWC_TRACE("Snapshot button released.\n"); } #ifdef CONFIG_USB_PWC_INPUT_EVDEV if (pdev->button_dev) { input_report_key(pdev->button_dev, KEY_CAMERA, down); input_sync(pdev->button_dev); } #endif } static void pwc_frame_complete(struct pwc_device *pdev) { struct pwc_frame_buf *fbuf = pdev->fill_buf; /* The ToUCam Fun CMOS sensor causes the firmware to send 2 or 3 bogus frames on the USB wire after an exposure change. This conditition is however detected in the cam and a bit is set in the header. */ if (pdev->type == 730) { unsigned char *ptr = (unsigned char *)fbuf->data; if (ptr[1] == 1 && ptr[0] & 0x10) { PWC_TRACE("Hyundai CMOS sensor bug. Dropping frame.\n"); pdev->drop_frames += 2; } if ((ptr[0] ^ pdev->vmirror) & 0x01) { pwc_snapshot_button(pdev, ptr[0] & 0x01); } if ((ptr[0] ^ pdev->vmirror) & 0x02) { if (ptr[0] & 0x02) PWC_TRACE("Image is mirrored.\n"); else PWC_TRACE("Image is normal.\n"); } pdev->vmirror = ptr[0] & 0x03; /* Sometimes the trailer of the 730 is still sent as a 4 byte packet after a short frame; this condition is filtered out specifically. A 4 byte frame doesn't make sense anyway. So we get either this sequence: drop_bit set -> 4 byte frame -> short frame -> good frame Or this one: drop_bit set -> short frame -> good frame So we drop either 3 or 2 frames in all! */ if (fbuf->filled == 4) pdev->drop_frames++; } else if (pdev->type == 740 || pdev->type == 720) { unsigned char *ptr = (unsigned char *)fbuf->data; if ((ptr[0] ^ pdev->vmirror) & 0x01) { pwc_snapshot_button(pdev, ptr[0] & 0x01); } pdev->vmirror = ptr[0] & 0x03; } /* In case we were instructed to drop the frame, do so silently. */ if (pdev->drop_frames > 0) { pdev->drop_frames--; } else { /* Check for underflow first */ if (fbuf->filled < pdev->frame_total_size) { PWC_DEBUG_FLOW("Frame buffer underflow (%d bytes); discarded.\n", fbuf->filled); } else { fbuf->vb.field = V4L2_FIELD_NONE; fbuf->vb.sequence = pdev->vframe_count; vb2_buffer_done(&fbuf->vb.vb2_buf, VB2_BUF_STATE_DONE); pdev->fill_buf = NULL; pdev->vsync = 0; } } /* !drop_frames */ pdev->vframe_count++; } /* This gets called for the Isochronous pipe (video). This is done in * interrupt time, so it has to be fast, not crash, and not stall. Neat. */ static void pwc_isoc_handler(struct urb *urb) { struct pwc_device *pdev = (struct pwc_device *)urb->context; struct device *dmadev = urb->dev->bus->sysdev; int i, fst, flen; unsigned char *iso_buf = NULL; trace_pwc_handler_enter(urb, pdev); if (urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN) { PWC_DEBUG_OPEN("URB (%p) unlinked %ssynchronously.\n", urb, urb->status == -ENOENT ? "" : "a"); return; } if (pdev->fill_buf == NULL) pdev->fill_buf = pwc_get_next_fill_buf(pdev); if (urb->status != 0) { const char *errmsg; errmsg = "Unknown"; switch(urb->status) { case -ENOSR: errmsg = "Buffer error (overrun)"; break; case -EPIPE: errmsg = "Stalled (device not responding)"; break; case -EOVERFLOW: errmsg = "Babble (bad cable?)"; break; case -EPROTO: errmsg = "Bit-stuff error (bad cable?)"; break; case -EILSEQ: errmsg = "CRC/Timeout (could be anything)"; break; case -ETIME: errmsg = "Device does not respond"; break; } PWC_ERROR("pwc_isoc_handler() called with status %d [%s].\n", urb->status, errmsg); /* Give up after a number of contiguous errors */ if (++pdev->visoc_errors > MAX_ISOC_ERRORS) { PWC_ERROR("Too many ISOC errors, bailing out.\n"); if (pdev->fill_buf) { vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); pdev->fill_buf = NULL; } } pdev->vsync = 0; /* Drop the current frame */ goto handler_end; } /* Reset ISOC error counter. We did get here, after all. */ pdev->visoc_errors = 0; dma_sync_single_for_cpu(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); /* vsync: 0 = don't copy data 1 = sync-hunt 2 = synched */ /* Compact data */ for (i = 0; i < urb->number_of_packets; i++) { fst = urb->iso_frame_desc[i].status; flen = urb->iso_frame_desc[i].actual_length; iso_buf = urb->transfer_buffer + urb->iso_frame_desc[i].offset; if (fst != 0) { PWC_ERROR("Iso frame %d has error %d\n", i, fst); continue; } if (flen > 0 && pdev->vsync) { struct pwc_frame_buf *fbuf = pdev->fill_buf; if (pdev->vsync == 1) { fbuf->vb.vb2_buf.timestamp = ktime_get_ns(); pdev->vsync = 2; } if (flen + fbuf->filled > pdev->frame_total_size) { PWC_ERROR("Frame overflow (%d > %d)\n", flen + fbuf->filled, pdev->frame_total_size); pdev->vsync = 0; /* Let's wait for an EOF */ } else { memcpy(fbuf->data + fbuf->filled, iso_buf, flen); fbuf->filled += flen; } } if (flen < pdev->vlast_packet_size) { /* Shorter packet... end of frame */ if (pdev->vsync == 2) pwc_frame_complete(pdev); if (pdev->fill_buf == NULL) pdev->fill_buf = pwc_get_next_fill_buf(pdev); if (pdev->fill_buf) { pdev->fill_buf->filled = 0; pdev->vsync = 1; } } pdev->vlast_packet_size = flen; } dma_sync_single_for_device(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); handler_end: trace_pwc_handler_exit(urb, pdev); i = usb_submit_urb(urb, GFP_ATOMIC); if (i != 0) PWC_ERROR("Error (%d) re-submitting urb in pwc_isoc_handler.\n", i); } /* Both v4l2_lock and vb_queue_lock should be locked when calling this */ static int pwc_isoc_init(struct pwc_device *pdev) { struct usb_device *udev; struct urb *urb; int i, j, ret; struct usb_interface *intf; struct usb_host_interface *idesc = NULL; int compression = 0; /* 0..3 = uncompressed..high */ pdev->vsync = 0; pdev->vlast_packet_size = 0; pdev->fill_buf = NULL; pdev->vframe_count = 0; pdev->visoc_errors = 0; udev = pdev->udev; retry: /* We first try with low compression and then retry with a higher compression setting if there is not enough bandwidth. */ ret = pwc_set_video_mode(pdev, pdev->width, pdev->height, pdev->pixfmt, pdev->vframes, &compression, 1); /* Get the current alternate interface, adjust packet size */ intf = usb_ifnum_to_if(udev, 0); if (intf) idesc = usb_altnum_to_altsetting(intf, pdev->valternate); if (!idesc) return -EIO; /* Search video endpoint */ pdev->vmax_packet_size = -1; for (i = 0; i < idesc->desc.bNumEndpoints; i++) { if ((idesc->endpoint[i].desc.bEndpointAddress & 0xF) == pdev->vendpoint) { pdev->vmax_packet_size = le16_to_cpu(idesc->endpoint[i].desc.wMaxPacketSize); break; } } if (pdev->vmax_packet_size < 0 || pdev->vmax_packet_size > ISO_MAX_FRAME_SIZE) { PWC_ERROR("Failed to find packet size for video endpoint in current alternate setting.\n"); return -ENFILE; /* Odd error, that should be noticeable */ } /* Set alternate interface */ PWC_DEBUG_OPEN("Setting alternate interface %d\n", pdev->valternate); ret = usb_set_interface(pdev->udev, 0, pdev->valternate); if (ret == -ENOSPC && compression < 3) { compression++; goto retry; } if (ret < 0) return ret; /* Allocate and init Isochronuous urbs */ for (i = 0; i < MAX_ISO_BUFS; i++) { urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL); if (urb == NULL) { pwc_isoc_cleanup(pdev); return -ENOMEM; } pdev->urbs[i] = urb; PWC_DEBUG_MEMORY("Allocated URB at 0x%p\n", urb); urb->interval = 1; // devik urb->dev = udev; urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; urb->transfer_buffer_length = ISO_BUFFER_SIZE; urb->transfer_buffer = pwc_alloc_urb_buffer(udev, urb->transfer_buffer_length, &urb->transfer_dma); if (urb->transfer_buffer == NULL) { PWC_ERROR("Failed to allocate urb buffer %d\n", i); pwc_isoc_cleanup(pdev); return -ENOMEM; } urb->complete = pwc_isoc_handler; urb->context = pdev; urb->start_frame = 0; urb->number_of_packets = ISO_FRAMES_PER_DESC; for (j = 0; j < ISO_FRAMES_PER_DESC; j++) { urb->iso_frame_desc[j].offset = j * ISO_MAX_FRAME_SIZE; urb->iso_frame_desc[j].length = pdev->vmax_packet_size; } } /* link */ for (i = 0; i < MAX_ISO_BUFS; i++) { ret = usb_submit_urb(pdev->urbs[i], GFP_KERNEL); if (ret == -ENOSPC && compression < 3) { compression++; pwc_isoc_cleanup(pdev); goto retry; } if (ret) { PWC_ERROR("isoc_init() submit_urb %d failed with error %d\n", i, ret); pwc_isoc_cleanup(pdev); return ret; } PWC_DEBUG_MEMORY("URB 0x%p submitted.\n", pdev->urbs[i]); } /* All is done... */ PWC_DEBUG_OPEN("<< pwc_isoc_init()\n"); return 0; } static void pwc_iso_stop(struct pwc_device *pdev) { int i; /* Unlinking ISOC buffers one by one */ for (i = 0; i < MAX_ISO_BUFS; i++) { if (pdev->urbs[i]) { PWC_DEBUG_MEMORY("Unlinking URB %p\n", pdev->urbs[i]); usb_kill_urb(pdev->urbs[i]); } } } static void pwc_iso_free(struct pwc_device *pdev) { int i; /* Freeing ISOC buffers one by one */ for (i = 0; i < MAX_ISO_BUFS; i++) { struct urb *urb = pdev->urbs[i]; if (urb) { PWC_DEBUG_MEMORY("Freeing URB\n"); if (urb->transfer_buffer) pwc_free_urb_buffer(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); pdev->urbs[i] = NULL; } } } /* Both v4l2_lock and vb_queue_lock should be locked when calling this */ static void pwc_isoc_cleanup(struct pwc_device *pdev) { PWC_DEBUG_OPEN(">> pwc_isoc_cleanup()\n"); pwc_iso_stop(pdev); pwc_iso_free(pdev); usb_set_interface(pdev->udev, 0, 0); PWC_DEBUG_OPEN("<< pwc_isoc_cleanup()\n"); } /* Must be called with vb_queue_lock hold */ static void pwc_cleanup_queued_bufs(struct pwc_device *pdev, enum vb2_buffer_state state) { unsigned long flags = 0; spin_lock_irqsave(&pdev->queued_bufs_lock, flags); while (!list_empty(&pdev->queued_bufs)) { struct pwc_frame_buf *buf; buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list); list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, state); } spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); } #ifdef CONFIG_USB_PWC_DEBUG static const char *pwc_sensor_type_to_string(unsigned int sensor_type) { switch(sensor_type) { case 0x00: return "Hyundai CMOS sensor"; case 0x20: return "Sony CCD sensor + TDA8787"; case 0x2E: return "Sony CCD sensor + Exas 98L59"; case 0x2F: return "Sony CCD sensor + ADI 9804"; case 0x30: return "Sharp CCD sensor + TDA8787"; case 0x3E: return "Sharp CCD sensor + Exas 98L59"; case 0x3F: return "Sharp CCD sensor + ADI 9804"; case 0x40: return "UPA 1021 sensor"; case 0x100: return "VGA sensor"; case 0x101: return "PAL MR sensor"; default: return "unknown type of sensor"; } } #endif /***************************************************************************/ /* Video4Linux functions */ static void pwc_video_release(struct v4l2_device *v) { struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev); v4l2_ctrl_handler_free(&pdev->ctrl_handler); v4l2_device_unregister(&pdev->v4l2_dev); kfree(pdev->ctrl_buf); kfree(pdev); } /***************************************************************************/ /* Videobuf2 operations */ static int queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct pwc_device *pdev = vb2_get_drv_priv(vq); int size; if (*nbuffers < MIN_FRAMES) *nbuffers = MIN_FRAMES; else if (*nbuffers > MAX_FRAMES) *nbuffers = MAX_FRAMES; *nplanes = 1; size = pwc_get_size(pdev, MAX_WIDTH, MAX_HEIGHT); sizes[0] = PAGE_ALIGN(pwc_image_sizes[size][0] * pwc_image_sizes[size][1] * 3 / 2); return 0; } static int buffer_init(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); /* need vmalloc since frame buffer > 128K */ buf->data = vzalloc(PWC_FRAME_SIZE); if (buf->data == NULL) return -ENOMEM; return 0; } static int buffer_prepare(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); /* Don't allow queueing new buffers after device disconnection */ if (!pdev->udev) return -ENODEV; return 0; } static void buffer_finish(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); if (vb->state == VB2_BUF_STATE_DONE) { /* * Application has called dqbuf and is getting back a buffer * we've filled, take the pwc data we've stored in buf->data * and decompress it into a usable format, storing the result * in the vb2_buffer. */ pwc_decompress(pdev, buf); } } static void buffer_cleanup(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); vfree(buf->data); } static void buffer_queue(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); unsigned long flags = 0; /* Check the device has not disconnected between prep and queuing */ if (!pdev->udev) { vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); return; } spin_lock_irqsave(&pdev->queued_bufs_lock, flags); list_add_tail(&buf->list, &pdev->queued_bufs); spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); } static int start_streaming(struct vb2_queue *vq, unsigned int count) { struct pwc_device *pdev = vb2_get_drv_priv(vq); int r; if (!pdev->udev) return -ENODEV; if (mutex_lock_interruptible(&pdev->v4l2_lock)) return -ERESTARTSYS; /* Turn on camera and set LEDS on */ pwc_camera_power(pdev, 1); pwc_set_leds(pdev, leds[0], leds[1]); r = pwc_isoc_init(pdev); if (r) { /* If we failed turn camera and LEDS back off */ pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); /* And cleanup any queued bufs!! */ pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_QUEUED); } mutex_unlock(&pdev->v4l2_lock); return r; } static void stop_streaming(struct vb2_queue *vq) { struct pwc_device *pdev = vb2_get_drv_priv(vq); mutex_lock(&pdev->v4l2_lock); if (pdev->udev) { pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); pwc_isoc_cleanup(pdev); } pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_ERROR); if (pdev->fill_buf) vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); mutex_unlock(&pdev->v4l2_lock); } static const struct vb2_ops pwc_vb_queue_ops = { .queue_setup = queue_setup, .buf_init = buffer_init, .buf_prepare = buffer_prepare, .buf_finish = buffer_finish, .buf_cleanup = buffer_cleanup, .buf_queue = buffer_queue, .start_streaming = start_streaming, .stop_streaming = stop_streaming, }; /***************************************************************************/ /* USB functions */ /* This function gets called when a new device is plugged in or the usb core * is loaded. */ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct pwc_device *pdev = NULL; int vendor_id, product_id, type_id; int rc; int features = 0; int compression = 0; int my_power_save = power_save; char serial_number[30], *name; vendor_id = le16_to_cpu(udev->descriptor.idVendor); product_id = le16_to_cpu(udev->descriptor.idProduct); /* Check if we can handle this device */ PWC_DEBUG_PROBE("probe() called [%04X %04X], if %d\n", vendor_id, product_id, intf->altsetting->desc.bInterfaceNumber); /* the interfaces are probed one by one. We are only interested in the video interface (0) now. Interface 1 is the Audio Control, and interface 2 Audio itself. */ if (intf->altsetting->desc.bInterfaceNumber > 0) return -ENODEV; if (vendor_id == 0x0471) { switch (product_id) { case 0x0302: PWC_INFO("Philips PCA645VC USB webcam detected.\n"); name = "Philips 645 webcam"; type_id = 645; break; case 0x0303: PWC_INFO("Philips PCA646VC USB webcam detected.\n"); name = "Philips 646 webcam"; type_id = 646; break; case 0x0304: PWC_INFO("Askey VC010 type 2 USB webcam detected.\n"); name = "Askey VC010 webcam"; type_id = 646; break; case 0x0307: PWC_INFO("Philips PCVC675K (Vesta) USB webcam detected.\n"); name = "Philips 675 webcam"; type_id = 675; break; case 0x0308: PWC_INFO("Philips PCVC680K (Vesta Pro) USB webcam detected.\n"); name = "Philips 680 webcam"; type_id = 680; break; case 0x030C: PWC_INFO("Philips PCVC690K (Vesta Pro Scan) USB webcam detected.\n"); name = "Philips 690 webcam"; type_id = 690; break; case 0x0310: PWC_INFO("Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) USB webcam detected.\n"); name = "Philips 730 webcam"; type_id = 730; break; case 0x0311: PWC_INFO("Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) USB webcam detected.\n"); name = "Philips 740 webcam"; type_id = 740; break; case 0x0312: PWC_INFO("Philips PCVC750K (ToUCam Pro Scan) USB webcam detected.\n"); name = "Philips 750 webcam"; type_id = 750; break; case 0x0313: PWC_INFO("Philips PCVC720K/40 (ToUCam XS) USB webcam detected.\n"); name = "Philips 720K/40 webcam"; type_id = 720; break; case 0x0329: PWC_INFO("Philips SPC 900NC USB webcam detected.\n"); name = "Philips SPC 900NC webcam"; type_id = 740; break; case 0x032C: PWC_INFO("Philips SPC 880NC USB webcam detected.\n"); name = "Philips SPC 880NC webcam"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x069A) { switch(product_id) { case 0x0001: PWC_INFO("Askey VC010 type 1 USB webcam detected.\n"); name = "Askey VC010 webcam"; type_id = 645; break; default: return -ENODEV; } } else if (vendor_id == 0x046d) { switch(product_id) { case 0x08b0: PWC_INFO("Logitech QuickCam Pro 3000 USB webcam detected.\n"); name = "Logitech QuickCam Pro 3000"; type_id = 740; /* CCD sensor */ break; case 0x08b1: PWC_INFO("Logitech QuickCam Notebook Pro USB webcam detected.\n"); name = "Logitech QuickCam Notebook Pro"; type_id = 740; /* CCD sensor */ break; case 0x08b2: PWC_INFO("Logitech QuickCam 4000 Pro USB webcam detected.\n"); name = "Logitech QuickCam Pro 4000"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; break; case 0x08b3: PWC_INFO("Logitech QuickCam Zoom USB webcam detected.\n"); name = "Logitech QuickCam Zoom"; type_id = 740; /* CCD sensor */ break; case 0x08B4: PWC_INFO("Logitech QuickCam Zoom (new model) USB webcam detected.\n"); name = "Logitech QuickCam Zoom"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; break; case 0x08b5: PWC_INFO("Logitech QuickCam Orbit/Sphere USB webcam detected.\n"); name = "Logitech QuickCam Orbit"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; features |= FEATURE_MOTOR_PANTILT; break; case 0x08b6: PWC_INFO("Logitech/Cisco VT Camera webcam detected.\n"); name = "Cisco VT Camera"; type_id = 740; /* CCD sensor */ break; case 0x08b7: PWC_INFO("Logitech ViewPort AV 100 webcam detected.\n"); name = "Logitech ViewPort AV 100"; type_id = 740; /* CCD sensor */ break; case 0x08b8: /* Where this released? */ PWC_INFO("Logitech QuickCam detected (reserved ID).\n"); name = "Logitech QuickCam (res.)"; type_id = 730; /* Assuming CMOS */ break; default: return -ENODEV; } } else if (vendor_id == 0x055d) { /* I don't know the difference between the C10 and the C30; I suppose the difference is the sensor, but both cameras work equally well with a type_id of 675 */ switch(product_id) { case 0x9000: PWC_INFO("Samsung MPC-C10 USB webcam detected.\n"); name = "Samsung MPC-C10"; type_id = 675; break; case 0x9001: PWC_INFO("Samsung MPC-C30 USB webcam detected.\n"); name = "Samsung MPC-C30"; type_id = 675; break; case 0x9002: PWC_INFO("Samsung SNC-35E (v3.0) USB webcam detected.\n"); name = "Samsung MPC-C30"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x041e) { switch(product_id) { case 0x400c: PWC_INFO("Creative Labs Webcam 5 detected.\n"); name = "Creative Labs Webcam 5"; type_id = 730; if (my_power_save == -1) my_power_save = 1; break; case 0x4011: PWC_INFO("Creative Labs Webcam Pro Ex detected.\n"); name = "Creative Labs Webcam Pro Ex"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x04cc) { switch(product_id) { case 0x8116: PWC_INFO("Sotec Afina Eye USB webcam detected.\n"); name = "Sotec Afina Eye"; type_id = 730; break; default: return -ENODEV; } } else if (vendor_id == 0x06be) { switch(product_id) { case 0x8116: /* This is essentially the same cam as the Sotec Afina Eye */ PWC_INFO("AME Co. Afina Eye USB webcam detected.\n"); name = "AME Co. Afina Eye"; type_id = 750; break; default: return -ENODEV; } } else if (vendor_id == 0x0d81) { switch(product_id) { case 0x1900: PWC_INFO("Visionite VCS-UC300 USB webcam detected.\n"); name = "Visionite VCS-UC300"; type_id = 740; /* CCD sensor */ break; case 0x1910: PWC_INFO("Visionite VCS-UM100 USB webcam detected.\n"); name = "Visionite VCS-UM100"; type_id = 730; /* CMOS sensor */ break; default: return -ENODEV; } } else return -ENODEV; /* Not any of the know types; but the list keeps growing. */ if (my_power_save == -1) my_power_save = 0; memset(serial_number, 0, 30); usb_string(udev, udev->descriptor.iSerialNumber, serial_number, 29); PWC_DEBUG_PROBE("Device serial number is %s\n", serial_number); if (udev->descriptor.bNumConfigurations > 1) PWC_WARNING("Warning: more than 1 configuration available.\n"); /* Allocate structure, initialize pointers, mutexes, etc. and link it to the usb_device */ pdev = kzalloc(sizeof(struct pwc_device), GFP_KERNEL); if (pdev == NULL) { PWC_ERROR("Oops, could not allocate memory for pwc_device.\n"); return -ENOMEM; } pdev->type = type_id; pdev->features = features; pwc_construct(pdev); /* set min/max sizes correct */ mutex_init(&pdev->v4l2_lock); mutex_init(&pdev->vb_queue_lock); spin_lock_init(&pdev->queued_bufs_lock); INIT_LIST_HEAD(&pdev->queued_bufs); pdev->udev = udev; pdev->power_save = my_power_save; /* Init videobuf2 queue structure */ pdev->vb_queue.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; pdev->vb_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_READ; pdev->vb_queue.drv_priv = pdev; pdev->vb_queue.buf_struct_size = sizeof(struct pwc_frame_buf); pdev->vb_queue.ops = &pwc_vb_queue_ops; pdev->vb_queue.mem_ops = &vb2_vmalloc_memops; pdev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; pdev->vb_queue.lock = &pdev->vb_queue_lock; rc = vb2_queue_init(&pdev->vb_queue); if (rc < 0) { PWC_ERROR("Oops, could not initialize vb2 queue.\n"); goto err_free_mem; } /* Init video_device structure */ pdev->vdev = pwc_template; strscpy(pdev->vdev.name, name, sizeof(pdev->vdev.name)); pdev->vdev.queue = &pdev->vb_queue; video_set_drvdata(&pdev->vdev, pdev); pdev->release = le16_to_cpu(udev->descriptor.bcdDevice); PWC_DEBUG_PROBE("Release: %04x\n", pdev->release); /* Allocate USB command buffers */ pdev->ctrl_buf = kmalloc(sizeof(pdev->cmd_buf), GFP_KERNEL); if (!pdev->ctrl_buf) { PWC_ERROR("Oops, could not allocate memory for pwc_device.\n"); rc = -ENOMEM; goto err_free_mem; } #ifdef CONFIG_USB_PWC_DEBUG /* Query sensor type */ if (pwc_get_cmos_sensor(pdev, &rc) >= 0) { PWC_DEBUG_OPEN("This %s camera is equipped with a %s (%d).\n", pdev->vdev.name, pwc_sensor_type_to_string(rc), rc); } #endif /* Set the leds off */ pwc_set_leds(pdev, 0, 0); /* Setup initial videomode */ rc = pwc_set_video_mode(pdev, MAX_WIDTH, MAX_HEIGHT, V4L2_PIX_FMT_YUV420, 30, &compression, 1); if (rc) goto err_free_mem; /* Register controls (and read default values from camera */ rc = pwc_init_controls(pdev); if (rc) { PWC_ERROR("Failed to register v4l2 controls (%d).\n", rc); goto err_free_mem; } /* And powerdown the camera until streaming starts */ pwc_camera_power(pdev, 0); /* Register the v4l2_device structure */ pdev->v4l2_dev.release = pwc_video_release; rc = v4l2_device_register(&intf->dev, &pdev->v4l2_dev); if (rc) { PWC_ERROR("Failed to register v4l2-device (%d).\n", rc); goto err_free_controls; } pdev->v4l2_dev.ctrl_handler = &pdev->ctrl_handler; pdev->vdev.v4l2_dev = &pdev->v4l2_dev; pdev->vdev.lock = &pdev->v4l2_lock; pdev->vdev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; rc = video_register_device(&pdev->vdev, VFL_TYPE_VIDEO, -1); if (rc < 0) { PWC_ERROR("Failed to register as video device (%d).\n", rc); goto err_unregister_v4l2_dev; } PWC_INFO("Registered as %s.\n", video_device_node_name(&pdev->vdev)); #ifdef CONFIG_USB_PWC_INPUT_EVDEV /* register webcam snapshot button input device */ pdev->button_dev = input_allocate_device(); if (!pdev->button_dev) { rc = -ENOMEM; goto err_video_unreg; } usb_make_path(udev, pdev->button_phys, sizeof(pdev->button_phys)); strlcat(pdev->button_phys, "/input0", sizeof(pdev->button_phys)); pdev->button_dev->name = "PWC snapshot button"; pdev->button_dev->phys = pdev->button_phys; usb_to_input_id(pdev->udev, &pdev->button_dev->id); pdev->button_dev->dev.parent = &pdev->udev->dev; pdev->button_dev->evbit[0] = BIT_MASK(EV_KEY); pdev->button_dev->keybit[BIT_WORD(KEY_CAMERA)] = BIT_MASK(KEY_CAMERA); rc = input_register_device(pdev->button_dev); if (rc) { input_free_device(pdev->button_dev); pdev->button_dev = NULL; goto err_video_unreg; } #endif return 0; #ifdef CONFIG_USB_PWC_INPUT_EVDEV err_video_unreg: video_unregister_device(&pdev->vdev); #endif err_unregister_v4l2_dev: v4l2_device_unregister(&pdev->v4l2_dev); err_free_controls: v4l2_ctrl_handler_free(&pdev->ctrl_handler); err_free_mem: kfree(pdev->ctrl_buf); kfree(pdev); return rc; } /* The user yanked out the cable... */ static void usb_pwc_disconnect(struct usb_interface *intf) { struct v4l2_device *v = usb_get_intfdata(intf); struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev); mutex_lock(&pdev->vb_queue_lock); mutex_lock(&pdev->v4l2_lock); /* No need to keep the urbs around after disconnection */ if (pdev->vb_queue.streaming) pwc_isoc_cleanup(pdev); pdev->udev = NULL; v4l2_device_disconnect(&pdev->v4l2_dev); video_unregister_device(&pdev->vdev); mutex_unlock(&pdev->v4l2_lock); mutex_unlock(&pdev->vb_queue_lock); #ifdef CONFIG_USB_PWC_INPUT_EVDEV if (pdev->button_dev) input_unregister_device(pdev->button_dev); #endif v4l2_device_put(&pdev->v4l2_dev); } /* * Initialization code & module stuff */ static unsigned int leds_nargs; #ifdef CONFIG_USB_PWC_DEBUG module_param_named(trace, pwc_trace, int, 0644); #endif module_param(power_save, int, 0644); module_param_array(leds, int, &leds_nargs, 0444); #ifdef CONFIG_USB_PWC_DEBUG MODULE_PARM_DESC(trace, "For debugging purposes"); #endif MODULE_PARM_DESC(power_save, "Turn power saving for new cameras on or off"); MODULE_PARM_DESC(leds, "LED on,off time in milliseconds"); MODULE_DESCRIPTION("Philips & OEM USB webcam driver"); MODULE_AUTHOR("Luc Saillard <luc@saillard.org>"); MODULE_LICENSE("GPL"); MODULE_ALIAS("pwcx"); MODULE_VERSION( PWC_VERSION ); module_usb_driver(pwc_driver);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 // SPDX-License-Identifier: GPL-2.0-or-later /* * DSA tagging protocol handling * * Copyright (c) 2008-2009 Marvell Semiconductor * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org> * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch> */ #include <linux/netdevice.h> #include <linux/ptp_classify.h> #include <linux/skbuff.h> #include <net/dsa.h> #include <net/dst_metadata.h> #include "tag.h" #include "user.h" static LIST_HEAD(dsa_tag_drivers_list); static DEFINE_MUTEX(dsa_tag_drivers_lock); /* Determine if we should defer delivery of skb until we have a rx timestamp. * * Called from dsa_switch_rcv. For now, this will only work if tagging is * enabled on the switch. Normally the MAC driver would retrieve the hardware * timestamp when it reads the packet out of the hardware. However in a DSA * switch, the DSA driver owning the interface to which the packet is * delivered is never notified unless we do so here. */ static bool dsa_skb_defer_rx_timestamp(struct dsa_user_priv *p, struct sk_buff *skb) { struct dsa_switch *ds = p->dp->ds; unsigned int type; if (!ds->ops->port_rxtstamp) return false; if (skb_headroom(skb) < ETH_HLEN) return false; __skb_push(skb, ETH_HLEN); type = ptp_classify_raw(skb); __skb_pull(skb, ETH_HLEN); if (type == PTP_CLASS_NONE) return false; return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type); } static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *unused) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; struct dsa_user_priv *p; if (unlikely(!cpu_dp)) { kfree_skb(skb); return 0; } skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) return 0; if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { unsigned int port = md_dst->u.port_info.port_id; skb_dst_drop(skb); if (!skb_has_extensions(skb)) skb->slow_gro = 0; skb->dev = dsa_conduit_find_user(dev, 0, port); if (likely(skb->dev)) { dsa_default_offload_fwd_mark(skb); nskb = skb; } } else { nskb = cpu_dp->rcv(skb, dev); } if (!nskb) { kfree_skb(skb); return 0; } skb = nskb; skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); if (unlikely(!dsa_user_dev_check(skb->dev))) { /* Packet is to be injected directly on an upper * device, e.g. a team/bond, so skip all DSA-port * specific actions. */ netif_rx(skb); return 0; } p = netdev_priv(skb->dev); if (unlikely(cpu_dp->ds->untag_bridge_pvid || cpu_dp->ds->untag_vlan_aware_bridge_pvid)) { nskb = dsa_software_vlan_untag(skb); if (!nskb) { kfree_skb(skb); return 0; } skb = nskb; } dev_sw_netstats_rx_add(skb->dev, skb->len + ETH_HLEN); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; gro_cells_receive(&p->gcells, skb); return 0; } struct packet_type dsa_pack_type __read_mostly = { .type = cpu_to_be16(ETH_P_XDSA), .func = dsa_switch_rcv, }; static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver, struct module *owner) { dsa_tag_driver->owner = owner; mutex_lock(&dsa_tag_drivers_lock); list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list); mutex_unlock(&dsa_tag_drivers_lock); } void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], unsigned int count, struct module *owner) { unsigned int i; for (i = 0; i < count; i++) dsa_tag_driver_register(dsa_tag_driver_array[i], owner); } static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver) { mutex_lock(&dsa_tag_drivers_lock); list_del(&dsa_tag_driver->list); mutex_unlock(&dsa_tag_drivers_lock); } EXPORT_SYMBOL_GPL(dsa_tag_drivers_register); void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], unsigned int count) { unsigned int i; for (i = 0; i < count; i++) dsa_tag_driver_unregister(dsa_tag_driver_array[i]); } EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) { return ops->name; }; /* Function takes a reference on the module owning the tagger, * so dsa_tag_driver_put must be called afterwards. */ const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name) { const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT); struct dsa_tag_driver *dsa_tag_driver; request_module("%s%s", DSA_TAG_DRIVER_ALIAS, name); mutex_lock(&dsa_tag_drivers_lock); list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { const struct dsa_device_ops *tmp = dsa_tag_driver->ops; if (strcmp(name, tmp->name)) continue; if (!try_module_get(dsa_tag_driver->owner)) break; ops = tmp; break; } mutex_unlock(&dsa_tag_drivers_lock); return ops; } const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol) { struct dsa_tag_driver *dsa_tag_driver; const struct dsa_device_ops *ops; bool found = false; request_module("%sid-%d", DSA_TAG_DRIVER_ALIAS, tag_protocol); mutex_lock(&dsa_tag_drivers_lock); list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { ops = dsa_tag_driver->ops; if (ops->proto == tag_protocol) { found = true; break; } } if (found) { if (!try_module_get(dsa_tag_driver->owner)) ops = ERR_PTR(-ENOPROTOOPT); } else { ops = ERR_PTR(-ENOPROTOOPT); } mutex_unlock(&dsa_tag_drivers_lock); return ops; } void dsa_tag_driver_put(const struct dsa_device_ops *ops) { struct dsa_tag_driver *dsa_tag_driver; mutex_lock(&dsa_tag_drivers_lock); list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { if (dsa_tag_driver->ops == ops) { module_put(dsa_tag_driver->owner); break; } } mutex_unlock(&dsa_tag_drivers_lock); }
9 9 9 4 4 4 5 5 2 2 1 13 8 11 12 6 8 8 13 6 8 8 9 4 13 7 9 12 13 13 13 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "core_priv.h" #include <linux/slab.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/netdevice.h> #include <linux/ethtool.h> #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> #include <rdma/ib_cache.h> #include <rdma/rdma_counter.h> #include <rdma/ib_sysfs.h> struct port_table_attribute { struct ib_port_attribute attr; char name[8]; int index; __be16 attr_id; }; struct gid_attr_group { struct ib_port *port; struct kobject kobj; struct attribute_group groups[2]; const struct attribute_group *groups_list[3]; struct port_table_attribute attrs_list[]; }; struct ib_port { struct kobject kobj; struct ib_device *ibdev; struct gid_attr_group *gid_attr_group; struct hw_stats_port_data *hw_stats_data; struct attribute_group groups[3]; const struct attribute_group *groups_list[5]; u32 port_num; struct port_table_attribute attrs_list[]; }; struct hw_stats_device_attribute { struct device_attribute attr; ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, char *buf); ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, const char *buf, size_t count); }; struct hw_stats_port_attribute { struct ib_port_attribute attr; ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, char *buf); ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, const char *buf, size_t count); }; struct hw_stats_device_data { struct attribute_group group; struct rdma_hw_stats *stats; struct hw_stats_device_attribute attrs[]; }; struct hw_stats_port_data { struct rdma_hw_stats *stats; struct hw_stats_port_attribute attrs[]; }; static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct ib_port_attribute *port_attr = container_of(attr, struct ib_port_attribute, attr); struct ib_port *p = container_of(kobj, struct ib_port, kobj); if (!port_attr->show) return -EIO; return port_attr->show(p->ibdev, p->port_num, port_attr, buf); } static ssize_t port_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct ib_port_attribute *port_attr = container_of(attr, struct ib_port_attribute, attr); struct ib_port *p = container_of(kobj, struct ib_port, kobj); if (!port_attr->store) return -EIO; return port_attr->store(p->ibdev, p->port_num, port_attr, buf, count); } struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj, u32 *port_num) { struct ib_port *port = container_of(kobj, struct ib_port, kobj); *port_num = port->port_num; return port->ibdev; } EXPORT_SYMBOL(ib_port_sysfs_get_ibdev_kobj); static const struct sysfs_ops port_sysfs_ops = { .show = port_attr_show, .store = port_attr_store }; static ssize_t hw_stat_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hw_stats_device_attribute *stat_attr = container_of(attr, struct hw_stats_device_attribute, attr); struct ib_device *ibdev = container_of(dev, struct ib_device, dev); return stat_attr->show(ibdev, ibdev->hw_stats_data->stats, stat_attr - ibdev->hw_stats_data->attrs, 0, buf); } static ssize_t hw_stat_device_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hw_stats_device_attribute *stat_attr = container_of(attr, struct hw_stats_device_attribute, attr); struct ib_device *ibdev = container_of(dev, struct ib_device, dev); return stat_attr->store(ibdev, ibdev->hw_stats_data->stats, stat_attr - ibdev->hw_stats_data->attrs, 0, buf, count); } static ssize_t hw_stat_port_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf) { struct hw_stats_port_attribute *stat_attr = container_of(attr, struct hw_stats_port_attribute, attr); struct ib_port *port = ibdev->port_data[port_num].sysfs; return stat_attr->show(ibdev, port->hw_stats_data->stats, stat_attr - port->hw_stats_data->attrs, port->port_num, buf); } static ssize_t hw_stat_port_store(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, const char *buf, size_t count) { struct hw_stats_port_attribute *stat_attr = container_of(attr, struct hw_stats_port_attribute, attr); struct ib_port *port = ibdev->port_data[port_num].sysfs; return stat_attr->store(ibdev, port->hw_stats_data->stats, stat_attr - port->hw_stats_data->attrs, port->port_num, buf, count); } static ssize_t gid_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct ib_port_attribute *port_attr = container_of(attr, struct ib_port_attribute, attr); struct ib_port *p = container_of(kobj, struct gid_attr_group, kobj)->port; if (!port_attr->show) return -EIO; return port_attr->show(p->ibdev, p->port_num, port_attr, buf); } static const struct sysfs_ops gid_attr_sysfs_ops = { .show = gid_attr_show }; static ssize_t state_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "%d: %s\n", attr.state, ib_port_state_to_str(attr.state)); } static ssize_t lid_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "%u\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "%u\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; char *speed = ""; int rate; /* in deci-Gb/sec */ ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; switch (attr.active_speed) { case IB_SPEED_DDR: speed = " DDR"; rate = 50; break; case IB_SPEED_QDR: speed = " QDR"; rate = 100; break; case IB_SPEED_FDR10: speed = " FDR10"; rate = 100; break; case IB_SPEED_FDR: speed = " FDR"; rate = 140; break; case IB_SPEED_EDR: speed = " EDR"; rate = 250; break; case IB_SPEED_HDR: speed = " HDR"; rate = 500; break; case IB_SPEED_NDR: speed = " NDR"; rate = 1000; break; case IB_SPEED_XDR: speed = " XDR"; rate = 2000; break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ speed = " SDR"; rate = 25; break; } rate *= ib_width_enum_to_int(attr.active_width); if (rate < 0) return -EINVAL; return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, rate % 10 ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } static const char *phys_state_to_str(enum ib_port_phys_state phys_state) { static const char *phys_state_str[] = { "<unknown>", "Sleep", "Polling", "Disabled", "PortConfigurationTraining", "LinkUp", "LinkErrorRecovery", "Phy Test", }; if (phys_state < ARRAY_SIZE(phys_state_str)) return phys_state_str[phys_state]; return "<unknown>"; } static ssize_t phys_state_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(ibdev, port_num, &attr); if (ret) return ret; return sysfs_emit(buf, "%u: %s\n", attr.phys_state, phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *unused, char *buf) { const char *output; switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: output = "InfiniBand"; break; case IB_LINK_LAYER_ETHERNET: output = "Ethernet"; break; default: output = "Unknown"; break; } return sysfs_emit(buf, "%s\n", output); } static IB_PORT_ATTR_RO(state); static IB_PORT_ATTR_RO(lid); static IB_PORT_ATTR_RO(lid_mask_count); static IB_PORT_ATTR_RO(sm_lid); static IB_PORT_ATTR_RO(sm_sl); static IB_PORT_ATTR_RO(cap_mask); static IB_PORT_ATTR_RO(rate); static IB_PORT_ATTR_RO(phys_state); static IB_PORT_ATTR_RO(link_layer); static struct attribute *port_default_attrs[] = { &ib_port_attr_state.attr, &ib_port_attr_lid.attr, &ib_port_attr_lid_mask_count.attr, &ib_port_attr_sm_lid.attr, &ib_port_attr_sm_sl.attr, &ib_port_attr_cap_mask.attr, &ib_port_attr_rate.attr, &ib_port_attr_phys_state.attr, &ib_port_attr_link_layer.attr, NULL }; ATTRIBUTE_GROUPS(port_default); static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { struct net_device *ndev; int ret = -EINVAL; rcu_read_lock(); ndev = rcu_dereference(gid_attr->ndev); if (ndev) ret = sysfs_emit(buf, "%s\n", ndev->name); rcu_read_unlock(); return ret; } static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); } static ssize_t _show_port_gid_attr( struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf, ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); const struct ib_gid_attr *gid_attr; ssize_t ret; gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index); if (IS_ERR(gid_attr)) /* -EINVAL is returned for user space compatibility reasons. */ return -EINVAL; ret = print(gid_attr, buf); rdma_put_gid_attr(gid_attr); return ret; } static ssize_t show_port_gid(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); const struct ib_gid_attr *gid_attr; int len; gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index); if (IS_ERR(gid_attr)) { const union ib_gid zgid = {}; /* If reading GID fails, it is likely due to GID entry being * empty (invalid) or reserved GID in the table. User space * expects to read GID table entries as long as it given index * is within GID table size. Administrative/debugging tool * fails to query rest of the GID entries if it hits error * while querying a GID of the given index. To avoid user * space throwing such error on fail to read gid, return zero * GID as before. This maintains backward compatibility. */ return sysfs_emit(buf, "%pI6\n", zgid.raw); } len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw); rdma_put_gid_attr(gid_attr); return len; } static ssize_t show_port_gid_attr_ndev(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf) { return _show_port_gid_attr(ibdev, port_num, attr, buf, print_ndev); } static ssize_t show_port_gid_attr_gid_type(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf) { return _show_port_gid_attr(ibdev, port_num, attr, buf, print_gid_type); } static ssize_t show_port_pkey(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; int ret; ret = ib_query_pkey(ibdev, port_num, tab_attr->index, &pkey); if (ret) return ret; return sysfs_emit(buf, "0x%04x\n", pkey); } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ struct port_table_attribute port_pma_attr_##_name = { \ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ .index = (_offset) | ((_width) << 16) | ((_counter) << 24), \ .attr_id = IB_PMA_PORT_COUNTERS, \ } #define PORT_PMA_ATTR_EXT(_name, _width, _offset) \ struct port_table_attribute port_pma_attr_ext_##_name = { \ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ .index = (_offset) | ((_width) << 16), \ .attr_id = IB_PMA_PORT_COUNTERS_EXT, \ } /* * Get a Perfmgmt MAD block of data. * Returns error code or the number of bytes retrieved. */ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, void *data, int offset, size_t size) { struct ib_mad *in_mad; struct ib_mad *out_mad; size_t mad_size = sizeof(*out_mad); u16 out_mad_pkey_index = 0; ssize_t ret; if (!dev->ops.process_mad) return -ENOSYS; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; } in_mad->mad_hdr.base_version = 1; in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; in_mad->mad_hdr.class_version = 1; in_mad->mad_hdr.method = IB_MGMT_METHOD_GET; in_mad->mad_hdr.attr_id = attr; if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, in_mad, out_mad, &mad_size, &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; goto out; } memcpy(data, out_mad->data + offset, size); ret = size; out: kfree(in_mad); kfree(out_mad); return ret; } static ssize_t show_pma_counter(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; int ret; u8 data[8]; int len; ret = get_perf_mad(ibdev, port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); if (ret < 0) return ret; switch (width) { case 4: len = sysfs_emit(buf, "%d\n", (*data >> (4 - (offset % 8))) & 0xf); break; case 8: len = sysfs_emit(buf, "%u\n", *data); break; case 16: len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data)); break; case 32: len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data)); break; case 64: len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data)); break; default: len = 0; break; } return len; } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48); static PORT_PMA_ATTR(link_downed , 2, 8, 56); static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64); static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80); static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96); static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112); static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128); static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136); static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152); static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156); static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176); static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192); static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224); static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256); static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288); static PORT_PMA_ATTR(port_xmit_wait , 0, 32, 320); /* * Counters added by extended set */ static PORT_PMA_ATTR_EXT(port_xmit_data , 64, 64); static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128); static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192); static PORT_PMA_ATTR_EXT(port_rcv_packets , 64, 256); static PORT_PMA_ATTR_EXT(unicast_xmit_packets , 64, 320); static PORT_PMA_ATTR_EXT(unicast_rcv_packets , 64, 384); static PORT_PMA_ATTR_EXT(multicast_xmit_packets , 64, 448); static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512); static struct attribute *pma_attrs[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_port_xmit_data.attr.attr, &port_pma_attr_port_rcv_data.attr.attr, &port_pma_attr_port_xmit_packets.attr.attr, &port_pma_attr_port_rcv_packets.attr.attr, &port_pma_attr_port_xmit_wait.attr.attr, NULL }; static struct attribute *pma_attrs_ext[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_ext_port_xmit_data.attr.attr, &port_pma_attr_ext_port_rcv_data.attr.attr, &port_pma_attr_ext_port_xmit_packets.attr.attr, &port_pma_attr_port_xmit_wait.attr.attr, &port_pma_attr_ext_port_rcv_packets.attr.attr, &port_pma_attr_ext_unicast_rcv_packets.attr.attr, &port_pma_attr_ext_unicast_xmit_packets.attr.attr, &port_pma_attr_ext_multicast_rcv_packets.attr.attr, &port_pma_attr_ext_multicast_xmit_packets.attr.attr, NULL }; static struct attribute *pma_attrs_noietf[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_ext_port_xmit_data.attr.attr, &port_pma_attr_ext_port_rcv_data.attr.attr, &port_pma_attr_ext_port_xmit_packets.attr.attr, &port_pma_attr_ext_port_rcv_packets.attr.attr, &port_pma_attr_port_xmit_wait.attr.attr, NULL }; static const struct attribute_group pma_group = { .name = "counters", .attrs = pma_attrs }; static const struct attribute_group pma_group_ext = { .name = "counters", .attrs = pma_attrs_ext }; static const struct attribute_group pma_group_noietf = { .name = "counters", .attrs = pma_attrs_noietf }; static void ib_port_release(struct kobject *kobj) { struct ib_port *port = container_of(kobj, struct ib_port, kobj); int i; for (i = 0; i != ARRAY_SIZE(port->groups); i++) kfree(port->groups[i].attrs); if (port->hw_stats_data) rdma_free_hw_stats_struct(port->hw_stats_data->stats); kfree(port->hw_stats_data); kvfree(port); } static void ib_port_gid_attr_release(struct kobject *kobj) { struct gid_attr_group *gid_attr_group = container_of(kobj, struct gid_attr_group, kobj); int i; for (i = 0; i != ARRAY_SIZE(gid_attr_group->groups); i++) kfree(gid_attr_group->groups[i].attrs); kfree(gid_attr_group); } static struct kobj_type port_type = { .release = ib_port_release, .sysfs_ops = &port_sysfs_ops, .default_groups = port_default_groups, }; static struct kobj_type gid_attr_type = { .sysfs_ops = &gid_attr_sysfs_ops, .release = ib_port_gid_attr_release }; /* * Figure out which counter table to use depending on * the device capabilities. */ static const struct attribute_group *get_counter_table(struct ib_device *dev, int port_num) { struct ib_class_port_info cpi; if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO, &cpi, 40, sizeof(cpi)) >= 0) { if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH) /* We have extended counters */ return &pma_group_ext; if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF) /* But not the IETF ones */ return &pma_group_noietf; } /* Fall back to normal counters */ return &pma_group; } static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, u32 port_num, int index) { int ret; if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) return 0; ret = dev->ops.get_hw_stats(dev, stats, port_num, index); if (ret < 0) return ret; if (ret == stats->num_counters) stats->timestamp = jiffies; return 0; } static int print_hw_stat(struct ib_device *dev, int port_num, struct rdma_hw_stats *stats, int index, char *buf) { u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); return sysfs_emit(buf, "%llu\n", stats->value[index] + v); } static ssize_t show_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, char *buf) { int ret; mutex_lock(&stats->lock); ret = update_hw_stats(ibdev, stats, port_num, index); if (ret) goto unlock; ret = print_hw_stat(ibdev, port_num, stats, index, buf); unlock: mutex_unlock(&stats->lock); return ret; } static ssize_t show_stats_lifespan(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, char *buf) { int msecs; mutex_lock(&stats->lock); msecs = jiffies_to_msecs(stats->lifespan); mutex_unlock(&stats->lock); return sysfs_emit(buf, "%d\n", msecs); } static ssize_t set_stats_lifespan(struct ib_device *ibdev, struct rdma_hw_stats *stats, unsigned int index, unsigned int port_num, const char *buf, size_t count) { int msecs; int jiffies; int ret; ret = kstrtoint(buf, 10, &msecs); if (ret) return ret; if (msecs < 0 || msecs > 10000) return -EINVAL; jiffies = msecs_to_jiffies(msecs); mutex_lock(&stats->lock); stats->lifespan = jiffies; mutex_unlock(&stats->lock); return count; } static struct hw_stats_device_data * alloc_hw_stats_device(struct ib_device *ibdev) { struct hw_stats_device_data *data; struct rdma_hw_stats *stats; if (!ibdev->ops.alloc_hw_device_stats) return ERR_PTR(-EOPNOTSUPP); stats = ibdev->ops.alloc_hw_device_stats(ibdev); if (!stats) return ERR_PTR(-ENOMEM); if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* * Two extra attribue elements here, one for the lifespan entry and * one to NULL terminate the list for the sysfs core code */ data = kzalloc(struct_size(data, attrs, size_add(stats->num_counters, 1)), GFP_KERNEL); if (!data) goto err_free_stats; data->group.attrs = kcalloc(stats->num_counters + 2, sizeof(*data->group.attrs), GFP_KERNEL); if (!data->group.attrs) goto err_free_data; data->group.name = "hw_counters"; data->stats = stats; return data; err_free_data: kfree(data); err_free_stats: rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } void ib_device_release_hw_stats(struct hw_stats_device_data *data) { kfree(data->group.attrs); rdma_free_hw_stats_struct(data->stats); kfree(data); } int ib_setup_device_attrs(struct ib_device *ibdev) { struct hw_stats_device_attribute *attr; struct hw_stats_device_data *data; bool opstat_skipped = false; int i, ret, pos = 0; data = alloc_hw_stats_device(ibdev); if (IS_ERR(data)) { if (PTR_ERR(data) == -EOPNOTSUPP) return 0; return PTR_ERR(data); } ibdev->hw_stats_data = data; ret = ibdev->ops.get_hw_stats(ibdev, data->stats, 0, data->stats->num_counters); if (ret != data->stats->num_counters) { if (WARN_ON(ret >= 0)) return -EINVAL; return ret; } data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { opstat_skipped = true; continue; } WARN_ON(opstat_skipped); attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_device_show; attr->show = show_hw_stats; data->group.attrs[pos] = &attr->attr.attr; pos++; } attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; attr->attr.show = hw_stat_device_show; attr->show = show_stats_lifespan; attr->attr.store = hw_stat_device_store; attr->store = set_stats_lifespan; data->group.attrs[pos] = &attr->attr.attr; for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) if (!ibdev->groups[i]) { ibdev->groups[i] = &data->group; ibdev->hw_stats_attr_index = i; return 0; } WARN(true, "struct ib_device->groups is too small"); return -EINVAL; } static struct hw_stats_port_data * alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) { struct ib_device *ibdev = port->ibdev; struct hw_stats_port_data *data; struct rdma_hw_stats *stats; if (!ibdev->ops.alloc_hw_port_stats) return ERR_PTR(-EOPNOTSUPP); stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num); if (!stats) return ERR_PTR(-ENOMEM); if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* * Two extra attribue elements here, one for the lifespan entry and * one to NULL terminate the list for the sysfs core code */ data = kzalloc(struct_size(data, attrs, size_add(stats->num_counters, 1)), GFP_KERNEL); if (!data) goto err_free_stats; group->attrs = kcalloc(stats->num_counters + 2, sizeof(*group->attrs), GFP_KERNEL); if (!group->attrs) goto err_free_data; group->name = "hw_counters"; data->stats = stats; return data; err_free_data: kfree(data); err_free_stats: rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } static int setup_hw_port_stats(struct ib_port *port, struct attribute_group *group) { struct hw_stats_port_attribute *attr; struct hw_stats_port_data *data; bool opstat_skipped = false; int i, ret, pos = 0; data = alloc_hw_stats_port(port, group); if (IS_ERR(data)) return PTR_ERR(data); ret = port->ibdev->ops.get_hw_stats(port->ibdev, data->stats, port->port_num, data->stats->num_counters); if (ret != data->stats->num_counters) { if (WARN_ON(ret >= 0)) return -EINVAL; return ret; } data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { opstat_skipped = true; continue; } WARN_ON(opstat_skipped); attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_port_show; attr->show = show_hw_stats; group->attrs[pos] = &attr->attr.attr; pos++; } attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; attr->attr.show = hw_stat_port_show; attr->show = show_stats_lifespan; attr->attr.store = hw_stat_port_store; attr->store = set_stats_lifespan; group->attrs[pos] = &attr->attr.attr; port->hw_stats_data = data; return 0; } struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev, u32 port_num) { if (!ibdev->port_data || !rdma_is_port_valid(ibdev, port_num) || !ibdev->port_data[port_num].sysfs->hw_stats_data) return NULL; return ibdev->port_data[port_num].sysfs->hw_stats_data->stats; } static int alloc_port_table_group(const char *name, struct attribute_group *group, struct port_table_attribute *attrs, size_t num, ssize_t (*show)(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *, char *buf)) { struct attribute **attr_list; int i; attr_list = kcalloc(num + 1, sizeof(*attr_list), GFP_KERNEL); if (!attr_list) return -ENOMEM; for (i = 0; i < num; i++) { struct port_table_attribute *element = &attrs[i]; if (snprintf(element->name, sizeof(element->name), "%d", i) >= sizeof(element->name)) goto err; sysfs_attr_init(&element->attr.attr); element->attr.attr.name = element->name; element->attr.attr.mode = 0444; element->attr.show = show; element->index = i; attr_list[i] = &element->attr.attr; } group->name = name; group->attrs = attr_list; return 0; err: kfree(attr_list); return -EINVAL; } /* * Create the sysfs: * ibp0s9/ports/XX/gid_attrs/{ndevs,types}/YYY * YYY is the gid table index in decimal */ static int setup_gid_attrs(struct ib_port *port, const struct ib_port_attr *attr) { struct gid_attr_group *gid_attr_group; int ret; gid_attr_group = kzalloc(struct_size(gid_attr_group, attrs_list, size_mul(attr->gid_tbl_len, 2)), GFP_KERNEL); if (!gid_attr_group) return -ENOMEM; gid_attr_group->port = port; kobject_init(&gid_attr_group->kobj, &gid_attr_type); ret = alloc_port_table_group("ndevs", &gid_attr_group->groups[0], gid_attr_group->attrs_list, attr->gid_tbl_len, show_port_gid_attr_ndev); if (ret) goto err_put; gid_attr_group->groups_list[0] = &gid_attr_group->groups[0]; ret = alloc_port_table_group( "types", &gid_attr_group->groups[1], gid_attr_group->attrs_list + attr->gid_tbl_len, attr->gid_tbl_len, show_port_gid_attr_gid_type); if (ret) goto err_put; gid_attr_group->groups_list[1] = &gid_attr_group->groups[1]; ret = kobject_add(&gid_attr_group->kobj, &port->kobj, "gid_attrs"); if (ret) goto err_put; ret = sysfs_create_groups(&gid_attr_group->kobj, gid_attr_group->groups_list); if (ret) goto err_del; port->gid_attr_group = gid_attr_group; return 0; err_del: kobject_del(&gid_attr_group->kobj); err_put: kobject_put(&gid_attr_group->kobj); return ret; } static void destroy_gid_attrs(struct ib_port *port) { struct gid_attr_group *gid_attr_group = port->gid_attr_group; if (!gid_attr_group) return; sysfs_remove_groups(&gid_attr_group->kobj, gid_attr_group->groups_list); kobject_del(&gid_attr_group->kobj); kobject_put(&gid_attr_group->kobj); } /* * Create the sysfs: * ibp0s9/ports/XX/{gids,pkeys,counters}/YYY */ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, const struct ib_port_attr *attr) { struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); bool is_full_dev = &device->coredev == coredev; const struct attribute_group **cur_group; struct ib_port *p; int ret; p = kvzalloc(struct_size(p, attrs_list, size_add(attr->gid_tbl_len, attr->pkey_tbl_len)), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); p->ibdev = device; p->port_num = port_num; kobject_init(&p->kobj, &port_type); if (device->port_data && is_full_dev) device->port_data[port_num].sysfs = p; cur_group = p->groups_list; ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list, attr->gid_tbl_len, show_port_gid); if (ret) goto err_put; *cur_group++ = &p->groups[0]; if (attr->pkey_tbl_len) { ret = alloc_port_table_group("pkeys", &p->groups[1], p->attrs_list + attr->gid_tbl_len, attr->pkey_tbl_len, show_port_pkey); if (ret) goto err_put; *cur_group++ = &p->groups[1]; } /* * If port == 0, it means hw_counters are per device and not per * port, so holder should be device. Therefore skip per port * counter initialization. */ if (port_num && is_full_dev) { ret = setup_hw_port_stats(p, &p->groups[2]); if (ret && ret != -EOPNOTSUPP) goto err_put; if (!ret) *cur_group++ = &p->groups[2]; } if (device->ops.process_mad && is_full_dev) *cur_group++ = get_counter_table(device, port_num); ret = kobject_add(&p->kobj, coredev->ports_kobj, "%d", port_num); if (ret) goto err_put; ret = sysfs_create_groups(&p->kobj, p->groups_list); if (ret) goto err_del; if (is_full_dev) { ret = sysfs_create_groups(&p->kobj, device->ops.port_groups); if (ret) goto err_groups; } list_add_tail(&p->kobj.entry, &coredev->port_list); return p; err_groups: sysfs_remove_groups(&p->kobj, p->groups_list); err_del: kobject_del(&p->kobj); err_put: if (device->port_data && is_full_dev) device->port_data[port_num].sysfs = NULL; kobject_put(&p->kobj); return ERR_PTR(ret); } static void destroy_port(struct ib_core_device *coredev, struct ib_port *port) { bool is_full_dev = &port->ibdev->coredev == coredev; list_del(&port->kobj.entry); if (is_full_dev) sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups); sysfs_remove_groups(&port->kobj, port->groups_list); kobject_del(&port->kobj); if (port->ibdev->port_data && port->ibdev->port_data[port->port_num].sysfs == port) port->ibdev->port_data[port->port_num].sysfs = NULL; kobject_put(&port->kobj); } static const char *node_type_string(int node_type) { switch (node_type) { case RDMA_NODE_IB_CA: return "CA"; case RDMA_NODE_IB_SWITCH: return "switch"; case RDMA_NODE_IB_ROUTER: return "router"; case RDMA_NODE_RNIC: return "RNIC"; case RDMA_NODE_USNIC: return "usNIC"; case RDMA_NODE_USNIC_UDP: return "usNIC UDP"; case RDMA_NODE_UNSPECIFIED: return "unspecified"; } return "<unknown>"; } static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); return sysfs_emit(buf, "%u: %s\n", dev->node_type, node_type_string(dev->node_type)); } static DEVICE_ATTR_RO(node_type); static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid; return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(guid[0]), be16_to_cpu(guid[1]), be16_to_cpu(guid[2]), be16_to_cpu(guid[3])); } static DEVICE_ATTR_RO(sys_image_guid); static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); __be16 *node_guid = (__be16 *)&dev->node_guid; return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(node_guid[0]), be16_to_cpu(node_guid[1]), be16_to_cpu(node_guid[2]), be16_to_cpu(node_guid[3])); } static DEVICE_ATTR_RO(node_guid); static ssize_t node_desc_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); return sysfs_emit(buf, "%.64s\n", dev->node_desc); } static ssize_t node_desc_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { struct ib_device *dev = rdma_device_to_ibdev(device); struct ib_device_modify desc = {}; int ret; if (!dev->ops.modify_device) return -EOPNOTSUPP; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); if (ret) return ret; return count; } static DEVICE_ATTR_RW(node_desc); static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); char version[IB_FW_VERSION_NAME_MAX] = {}; ib_get_device_fw_str(dev, version); return sysfs_emit(buf, "%s\n", version); } static DEVICE_ATTR_RO(fw_ver); static struct attribute *ib_dev_attrs[] = { &dev_attr_node_type.attr, &dev_attr_node_guid.attr, &dev_attr_sys_image_guid.attr, &dev_attr_fw_ver.attr, &dev_attr_node_desc.attr, NULL, }; const struct attribute_group ib_dev_attr_group = { .attrs = ib_dev_attrs, }; void ib_free_port_attrs(struct ib_core_device *coredev) { struct kobject *p, *t; list_for_each_entry_safe(p, t, &coredev->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); destroy_gid_attrs(port); destroy_port(coredev, port); } kobject_put(coredev->ports_kobj); } int ib_setup_port_attrs(struct ib_core_device *coredev) { struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); u32 port_num; int ret; coredev->ports_kobj = kobject_create_and_add("ports", &coredev->dev.kobj); if (!coredev->ports_kobj) return -ENOMEM; rdma_for_each_port (device, port_num) { struct ib_port_attr attr; struct ib_port *port; ret = ib_query_port(device, port_num, &attr); if (ret) goto err_put; port = setup_port(coredev, port_num, &attr); if (IS_ERR(port)) { ret = PTR_ERR(port); goto err_put; } ret = setup_gid_attrs(port, &attr); if (ret) goto err_put; } return 0; err_put: ib_free_port_attrs(coredev); return ret; } /** * ib_port_register_client_groups - Add an ib_client's attributes to the port * * @ibdev: IB device to add counters * @port_num: valid port number * @groups: Group list of attributes * * Do not use. Only for legacy sysfs compatibility. */ int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num, const struct attribute_group **groups) { return sysfs_create_groups(&ibdev->port_data[port_num].sysfs->kobj, groups); } EXPORT_SYMBOL(ib_port_register_client_groups); void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num, const struct attribute_group **groups) { return sysfs_remove_groups(&ibdev->port_data[port_num].sysfs->kobj, groups); } EXPORT_SYMBOL(ib_port_unregister_client_groups);
143 144 144 144 143 5 1 4 218 4 80 144 156 67 222 170 170 222 222 1 170 170 31 31 223 222 31 31 222 222 31 14 48 48 36 36 19 19 10 10 193 193 83 4 21 12 12 6 6 223 222 1 134 59 30 188 4 1 1 5 130 16 41 171 171 170 171 37 170 25 56 12 110 5 8 6 160 3 26 72 1 72 35 143 143 143 143 79 145 4 145 143 143 94 51 145 7 7 7 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter driver * * Copyright (C) 2009 - 2013 Johan Hovold (jhovold@gmail.com) * Copyright (C) 1999 - 2012 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2000 Peter Berger (pberger@brimson.com) * Copyright (C) 2000 Al Borchers (borchers@steinerpoint.com) * * This driver was originally based on the ACM driver by Armin Fuerst (which was * based on a driver by Brad Keryan) * * See Documentation/usb/usb-serial.rst for more information on using this * driver */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/uaccess.h> #include <linux/serial.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/kfifo.h> #include <linux/idr.h> #define DRIVER_AUTHOR "Greg Kroah-Hartman <gregkh@linuxfoundation.org>" #define DRIVER_DESC "USB Serial Driver core" #define USB_SERIAL_TTY_MAJOR 188 #define USB_SERIAL_TTY_MINORS 512 /* should be enough for a while */ /* There is no MODULE_DEVICE_TABLE for usbserial.c. Instead the MODULE_DEVICE_TABLE declarations in each serial driver cause the "hotplug" program to pull in whatever module is necessary via modprobe, and modprobe will load usbserial because the serial drivers depend on it. */ static DEFINE_IDR(serial_minors); static DEFINE_MUTEX(table_lock); static LIST_HEAD(usb_serial_driver_list); /* * Look up the serial port structure. If it is found and it hasn't been * disconnected, return with the parent usb_serial structure's disc_mutex held * and its refcount incremented. Otherwise return NULL. */ struct usb_serial_port *usb_serial_port_get_by_minor(unsigned minor) { struct usb_serial *serial; struct usb_serial_port *port; mutex_lock(&table_lock); port = idr_find(&serial_minors, minor); if (!port) goto exit; serial = port->serial; mutex_lock(&serial->disc_mutex); if (serial->disconnected) { mutex_unlock(&serial->disc_mutex); port = NULL; } else { kref_get(&serial->kref); } exit: mutex_unlock(&table_lock); return port; } static int allocate_minors(struct usb_serial *serial, int num_ports) { struct usb_serial_port *port; unsigned int i, j; int minor; dev_dbg(&serial->interface->dev, "%s %d\n", __func__, num_ports); mutex_lock(&table_lock); for (i = 0; i < num_ports; ++i) { port = serial->port[i]; minor = idr_alloc(&serial_minors, port, 0, USB_SERIAL_TTY_MINORS, GFP_KERNEL); if (minor < 0) goto error; port->minor = minor; port->port_number = i; } serial->minors_reserved = 1; mutex_unlock(&table_lock); return 0; error: /* unwind the already allocated minors */ for (j = 0; j < i; ++j) idr_remove(&serial_minors, serial->port[j]->minor); mutex_unlock(&table_lock); return minor; } static void release_minors(struct usb_serial *serial) { int i; mutex_lock(&table_lock); for (i = 0; i < serial->num_ports; ++i) idr_remove(&serial_minors, serial->port[i]->minor); mutex_unlock(&table_lock); serial->minors_reserved = 0; } int usb_serial_claim_interface(struct usb_serial *serial, struct usb_interface *intf) { struct usb_driver *driver = serial->type->usb_driver; int ret; if (serial->sibling) return -EBUSY; ret = usb_driver_claim_interface(driver, intf, serial); if (ret) { dev_err(&serial->interface->dev, "failed to claim sibling interface: %d\n", ret); return ret; } serial->sibling = intf; return 0; } EXPORT_SYMBOL_GPL(usb_serial_claim_interface); static void release_sibling(struct usb_serial *serial, struct usb_interface *intf) { struct usb_driver *driver = serial->type->usb_driver; struct usb_interface *sibling; if (!serial->sibling) return; if (intf == serial->sibling) sibling = serial->interface; else sibling = serial->sibling; usb_set_intfdata(sibling, NULL); usb_driver_release_interface(driver, sibling); } static void destroy_serial(struct kref *kref) { struct usb_serial *serial; struct usb_serial_port *port; int i; serial = to_usb_serial(kref); /* return the minor range that this device had */ if (serial->minors_reserved) release_minors(serial); if (serial->attached && serial->type->release) serial->type->release(serial); /* Now that nothing is using the ports, they can be freed */ for (i = 0; i < serial->num_port_pointers; ++i) { port = serial->port[i]; if (port) { port->serial = NULL; put_device(&port->dev); } } usb_put_intf(serial->interface); usb_put_dev(serial->dev); kfree(serial); } void usb_serial_put(struct usb_serial *serial) { kref_put(&serial->kref, destroy_serial); } /***************************************************************************** * Driver tty interface functions *****************************************************************************/ /** * serial_install - install tty * @driver: the driver (USB in our case) * @tty: the tty being created * * Initialise the termios structure for this tty. We use the default * USB serial settings but permit them to be overridden by * serial->type->init_termios on first open. * * This is the first place a new tty gets used. Hence this is where we * acquire references to the usb_serial structure and the driver module, * where we store a pointer to the port. All these actions are reversed * in serial_cleanup(). */ static int serial_install(struct tty_driver *driver, struct tty_struct *tty) { int idx = tty->index; struct usb_serial *serial; struct usb_serial_port *port; bool init_termios; int retval = -ENODEV; port = usb_serial_port_get_by_minor(idx); if (!port) return retval; serial = port->serial; if (!try_module_get(serial->type->driver.owner)) goto err_put_serial; init_termios = (driver->termios[idx] == NULL); retval = tty_standard_install(driver, tty); if (retval) goto err_put_module; mutex_unlock(&serial->disc_mutex); /* allow the driver to update the initial settings */ if (init_termios && serial->type->init_termios) serial->type->init_termios(tty); tty->driver_data = port; return retval; err_put_module: module_put(serial->type->driver.owner); err_put_serial: usb_serial_put(serial); mutex_unlock(&serial->disc_mutex); return retval; } static int serial_port_activate(struct tty_port *tport, struct tty_struct *tty) { struct usb_serial_port *port = container_of(tport, struct usb_serial_port, port); struct usb_serial *serial = port->serial; int retval; mutex_lock(&serial->disc_mutex); if (serial->disconnected) { retval = -ENODEV; goto out_unlock; } retval = usb_autopm_get_interface(serial->interface); if (retval) goto out_unlock; retval = port->serial->type->open(tty, port); if (retval) usb_autopm_put_interface(serial->interface); out_unlock: mutex_unlock(&serial->disc_mutex); if (retval < 0) retval = usb_translate_errors(retval); return retval; } static int serial_open(struct tty_struct *tty, struct file *filp) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); return tty_port_open(&port->port, tty, filp); } /** * serial_port_shutdown - shut down hardware * @tport: tty port to shut down * * Shut down a USB serial port. Serialized against activate by the * tport mutex and kept to matching open/close pairs * of calls by the tty-port initialized flag. * * Not called if tty is console. */ static void serial_port_shutdown(struct tty_port *tport) { struct usb_serial_port *port = container_of(tport, struct usb_serial_port, port); struct usb_serial_driver *drv = port->serial->type; if (drv->close) drv->close(port); usb_autopm_put_interface(port->serial->interface); } static void serial_hangup(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); tty_port_hangup(&port->port); } static void serial_close(struct tty_struct *tty, struct file *filp) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); tty_port_close(&port->port, tty, filp); } /** * serial_cleanup - free resources post close/hangup * @tty: tty to clean up * * Do the resource freeing and refcount dropping for the port. * Avoid freeing the console. * * Called asynchronously after the last tty kref is dropped. */ static void serial_cleanup(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial; struct module *owner; dev_dbg(&port->dev, "%s\n", __func__); /* The console is magical. Do not hang up the console hardware * or there will be tears. */ if (port->port.console) return; tty->driver_data = NULL; serial = port->serial; owner = serial->type->driver.owner; usb_serial_put(serial); module_put(owner); } static ssize_t serial_write(struct tty_struct *tty, const u8 *buf, size_t count) { struct usb_serial_port *port = tty->driver_data; int retval = -ENODEV; if (port->serial->dev->state == USB_STATE_NOTATTACHED) goto exit; dev_dbg(&port->dev, "%s - %zu byte(s)\n", __func__, count); retval = port->serial->type->write(tty, port, buf, count); if (retval < 0) retval = usb_translate_errors(retval); exit: return retval; } static unsigned int serial_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); return port->serial->type->write_room(tty); } static unsigned int serial_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s\n", __func__); if (serial->disconnected) return 0; return serial->type->chars_in_buffer(tty); } static void serial_wait_until_sent(struct tty_struct *tty, int timeout) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s\n", __func__); if (!port->serial->type->wait_until_sent) return; mutex_lock(&serial->disc_mutex); if (!serial->disconnected) port->serial->type->wait_until_sent(tty, timeout); mutex_unlock(&serial->disc_mutex); } static void serial_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->throttle) port->serial->type->throttle(tty); } static void serial_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->unthrottle) port->serial->type->unthrottle(tty); } static int serial_get_serial(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct tty_port *tport = &port->port; unsigned int close_delay, closing_wait; mutex_lock(&tport->mutex); close_delay = jiffies_to_msecs(tport->close_delay) / 10; closing_wait = tport->closing_wait; if (closing_wait != ASYNC_CLOSING_WAIT_NONE) closing_wait = jiffies_to_msecs(closing_wait) / 10; ss->line = port->minor; ss->close_delay = close_delay; ss->closing_wait = closing_wait; if (port->serial->type->get_serial) port->serial->type->get_serial(tty, ss); mutex_unlock(&tport->mutex); return 0; } static int serial_set_serial(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct tty_port *tport = &port->port; unsigned int close_delay, closing_wait; int ret = 0; close_delay = msecs_to_jiffies(ss->close_delay * 10); closing_wait = ss->closing_wait; if (closing_wait != ASYNC_CLOSING_WAIT_NONE) closing_wait = msecs_to_jiffies(closing_wait * 10); mutex_lock(&tport->mutex); if (!capable(CAP_SYS_ADMIN)) { if (close_delay != tport->close_delay || closing_wait != tport->closing_wait) { ret = -EPERM; goto out_unlock; } } if (port->serial->type->set_serial) { ret = port->serial->type->set_serial(tty, ss); if (ret) goto out_unlock; } tport->close_delay = close_delay; tport->closing_wait = closing_wait; out_unlock: mutex_unlock(&tport->mutex); return ret; } static int serial_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; int retval = -ENOIOCTLCMD; dev_dbg(&port->dev, "%s - cmd 0x%04x\n", __func__, cmd); switch (cmd) { case TIOCMIWAIT: if (port->serial->type->tiocmiwait) retval = port->serial->type->tiocmiwait(tty, arg); break; default: if (port->serial->type->ioctl) retval = port->serial->type->ioctl(tty, cmd, arg); } return retval; } static void serial_set_termios(struct tty_struct *tty, const struct ktermios *old) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->set_termios) port->serial->type->set_termios(tty, port, old); else tty_termios_copy_hw(&tty->termios, old); } static int serial_break(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->break_ctl) return port->serial->type->break_ctl(tty, break_state); return -ENOTTY; } static int serial_proc_show(struct seq_file *m, void *v) { struct usb_serial *serial; struct usb_serial_port *port; int i; char tmp[40]; seq_puts(m, "usbserinfo:1.0 driver:2.0\n"); for (i = 0; i < USB_SERIAL_TTY_MINORS; ++i) { port = usb_serial_port_get_by_minor(i); if (port == NULL) continue; serial = port->serial; seq_printf(m, "%d:", i); if (serial->type->driver.owner) seq_printf(m, " module:%s", module_name(serial->type->driver.owner)); seq_printf(m, " name:\"%s\"", serial->type->description); seq_printf(m, " vendor:%04x product:%04x", le16_to_cpu(serial->dev->descriptor.idVendor), le16_to_cpu(serial->dev->descriptor.idProduct)); seq_printf(m, " num_ports:%d", serial->num_ports); seq_printf(m, " port:%d", port->port_number); usb_make_path(serial->dev, tmp, sizeof(tmp)); seq_printf(m, " path:%s", tmp); seq_putc(m, '\n'); usb_serial_put(serial); mutex_unlock(&serial->disc_mutex); } return 0; } static int serial_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->tiocmget) return port->serial->type->tiocmget(tty); return -ENOTTY; } static int serial_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->tiocmset) return port->serial->type->tiocmset(tty, set, clear); return -ENOTTY; } static int serial_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->get_icount) return port->serial->type->get_icount(tty, icount); return -ENOTTY; } /* * We would be calling tty_wakeup here, but unfortunately some line * disciplines have an annoying habit of calling tty->write from * the write wakeup callback (e.g. n_hdlc.c). */ void usb_serial_port_softint(struct usb_serial_port *port) { schedule_work(&port->work); } EXPORT_SYMBOL_GPL(usb_serial_port_softint); static void usb_serial_port_work(struct work_struct *work) { struct usb_serial_port *port = container_of(work, struct usb_serial_port, work); tty_port_tty_wakeup(&port->port); } static void usb_serial_port_poison_urbs(struct usb_serial_port *port) { int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_poison_urb(port->read_urbs[i]); for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) usb_poison_urb(port->write_urbs[i]); usb_poison_urb(port->interrupt_in_urb); usb_poison_urb(port->interrupt_out_urb); } static void usb_serial_port_unpoison_urbs(struct usb_serial_port *port) { int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_unpoison_urb(port->read_urbs[i]); for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) usb_unpoison_urb(port->write_urbs[i]); usb_unpoison_urb(port->interrupt_in_urb); usb_unpoison_urb(port->interrupt_out_urb); } static void usb_serial_port_release(struct device *dev) { struct usb_serial_port *port = to_usb_serial_port(dev); int i; dev_dbg(dev, "%s\n", __func__); usb_free_urb(port->interrupt_in_urb); usb_free_urb(port->interrupt_out_urb); for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { usb_free_urb(port->read_urbs[i]); kfree(port->bulk_in_buffers[i]); } for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { usb_free_urb(port->write_urbs[i]); kfree(port->bulk_out_buffers[i]); } kfifo_free(&port->write_fifo); kfree(port->interrupt_in_buffer); kfree(port->interrupt_out_buffer); tty_port_destroy(&port->port); kfree(port); } static struct usb_serial *create_serial(struct usb_device *dev, struct usb_interface *interface, struct usb_serial_driver *driver) { struct usb_serial *serial; serial = kzalloc(sizeof(*serial), GFP_KERNEL); if (!serial) return NULL; serial->dev = usb_get_dev(dev); serial->type = driver; serial->interface = usb_get_intf(interface); kref_init(&serial->kref); mutex_init(&serial->disc_mutex); serial->minors_reserved = 0; return serial; } static const struct usb_device_id *match_dynamic_id(struct usb_interface *intf, struct usb_serial_driver *drv) { struct usb_dynid *dynid; guard(mutex)(&usb_dynids_lock); list_for_each_entry(dynid, &drv->dynids.list, node) { if (usb_match_one_id(intf, &dynid->id)) { return &dynid->id; } } return NULL; } static const struct usb_device_id *get_iface_id(struct usb_serial_driver *drv, struct usb_interface *intf) { const struct usb_device_id *id; id = usb_match_id(intf, drv->id_table); if (id) { dev_dbg(&intf->dev, "static descriptor matches\n"); goto exit; } id = match_dynamic_id(intf, drv); if (id) dev_dbg(&intf->dev, "dynamic descriptor matches\n"); exit: return id; } /* Caller must hold table_lock */ static struct usb_serial_driver *search_serial_device( struct usb_interface *iface) { const struct usb_device_id *id = NULL; struct usb_serial_driver *drv; struct usb_driver *driver = to_usb_driver(iface->dev.driver); /* Check if the usb id matches a known device */ list_for_each_entry(drv, &usb_serial_driver_list, driver_list) { if (drv->usb_driver == driver) id = get_iface_id(drv, iface); if (id) return drv; } return NULL; } static bool serial_port_carrier_raised(struct tty_port *port) { struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); struct usb_serial_driver *drv = p->serial->type; if (drv->carrier_raised) return drv->carrier_raised(p); /* No carrier control - don't block */ return true; } static void serial_port_dtr_rts(struct tty_port *port, bool on) { struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); struct usb_serial_driver *drv = p->serial->type; if (drv->dtr_rts) drv->dtr_rts(p, on); } static ssize_t port_number_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_serial_port *port = to_usb_serial_port(dev); return sprintf(buf, "%u\n", port->port_number); } static DEVICE_ATTR_RO(port_number); static struct attribute *usb_serial_port_attrs[] = { &dev_attr_port_number.attr, NULL }; ATTRIBUTE_GROUPS(usb_serial_port); static const struct tty_port_operations serial_port_ops = { .carrier_raised = serial_port_carrier_raised, .dtr_rts = serial_port_dtr_rts, .activate = serial_port_activate, .shutdown = serial_port_shutdown, }; static void store_endpoint(struct usb_serial *serial, struct usb_serial_endpoints *epds, struct usb_endpoint_descriptor *epd) { struct device *dev = &serial->interface->dev; u8 addr = epd->bEndpointAddress; if (usb_endpoint_is_bulk_in(epd)) { if (epds->num_bulk_in == ARRAY_SIZE(epds->bulk_in)) return; dev_dbg(dev, "found bulk in endpoint %02x\n", addr); epds->bulk_in[epds->num_bulk_in++] = epd; } else if (usb_endpoint_is_bulk_out(epd)) { if (epds->num_bulk_out == ARRAY_SIZE(epds->bulk_out)) return; dev_dbg(dev, "found bulk out endpoint %02x\n", addr); epds->bulk_out[epds->num_bulk_out++] = epd; } else if (usb_endpoint_is_int_in(epd)) { if (epds->num_interrupt_in == ARRAY_SIZE(epds->interrupt_in)) return; dev_dbg(dev, "found interrupt in endpoint %02x\n", addr); epds->interrupt_in[epds->num_interrupt_in++] = epd; } else if (usb_endpoint_is_int_out(epd)) { if (epds->num_interrupt_out == ARRAY_SIZE(epds->interrupt_out)) return; dev_dbg(dev, "found interrupt out endpoint %02x\n", addr); epds->interrupt_out[epds->num_interrupt_out++] = epd; } } static void find_endpoints(struct usb_serial *serial, struct usb_serial_endpoints *epds, struct usb_interface *intf) { struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *epd; unsigned int i; iface_desc = intf->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { epd = &iface_desc->endpoint[i].desc; store_endpoint(serial, epds, epd); } } static int setup_port_bulk_in(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; int i; buffer_size = max_t(int, type->bulk_in_size, usb_endpoint_maxp(epd)); port->bulk_in_size = buffer_size; port->bulk_in_endpointAddress = epd->bEndpointAddress; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { set_bit(i, &port->read_urbs_free); port->read_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!port->read_urbs[i]) return -ENOMEM; port->bulk_in_buffers[i] = kmalloc(buffer_size, GFP_KERNEL); if (!port->bulk_in_buffers[i]) return -ENOMEM; usb_fill_bulk_urb(port->read_urbs[i], udev, usb_rcvbulkpipe(udev, epd->bEndpointAddress), port->bulk_in_buffers[i], buffer_size, type->read_bulk_callback, port); } port->read_urb = port->read_urbs[0]; port->bulk_in_buffer = port->bulk_in_buffers[0]; return 0; } static int setup_port_bulk_out(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; int i; if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL)) return -ENOMEM; if (type->bulk_out_size) buffer_size = type->bulk_out_size; else buffer_size = usb_endpoint_maxp(epd); port->bulk_out_size = buffer_size; port->bulk_out_endpointAddress = epd->bEndpointAddress; for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { set_bit(i, &port->write_urbs_free); port->write_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!port->write_urbs[i]) return -ENOMEM; port->bulk_out_buffers[i] = kmalloc(buffer_size, GFP_KERNEL); if (!port->bulk_out_buffers[i]) return -ENOMEM; usb_fill_bulk_urb(port->write_urbs[i], udev, usb_sndbulkpipe(udev, epd->bEndpointAddress), port->bulk_out_buffers[i], buffer_size, type->write_bulk_callback, port); } port->write_urb = port->write_urbs[0]; port->bulk_out_buffer = port->bulk_out_buffers[0]; return 0; } static int setup_port_interrupt_in(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; port->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!port->interrupt_in_urb) return -ENOMEM; buffer_size = usb_endpoint_maxp(epd); port->interrupt_in_endpointAddress = epd->bEndpointAddress; port->interrupt_in_buffer = kmalloc(buffer_size, GFP_KERNEL); if (!port->interrupt_in_buffer) return -ENOMEM; usb_fill_int_urb(port->interrupt_in_urb, udev, usb_rcvintpipe(udev, epd->bEndpointAddress), port->interrupt_in_buffer, buffer_size, type->read_int_callback, port, epd->bInterval); return 0; } static int setup_port_interrupt_out(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; port->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!port->interrupt_out_urb) return -ENOMEM; buffer_size = usb_endpoint_maxp(epd); port->interrupt_out_size = buffer_size; port->interrupt_out_endpointAddress = epd->bEndpointAddress; port->interrupt_out_buffer = kmalloc(buffer_size, GFP_KERNEL); if (!port->interrupt_out_buffer) return -ENOMEM; usb_fill_int_urb(port->interrupt_out_urb, udev, usb_sndintpipe(udev, epd->bEndpointAddress), port->interrupt_out_buffer, buffer_size, type->write_int_callback, port, epd->bInterval); return 0; } static int usb_serial_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct device *ddev = &interface->dev; struct usb_device *dev = interface_to_usbdev(interface); struct usb_serial *serial = NULL; struct usb_serial_port *port; struct usb_serial_endpoints *epds; struct usb_serial_driver *type = NULL; int retval; int i; int num_ports = 0; unsigned char max_endpoints; mutex_lock(&table_lock); type = search_serial_device(interface); if (!type) { mutex_unlock(&table_lock); dev_dbg(ddev, "none matched\n"); return -ENODEV; } if (!try_module_get(type->driver.owner)) { mutex_unlock(&table_lock); dev_err(ddev, "module get failed, exiting\n"); return -EIO; } mutex_unlock(&table_lock); serial = create_serial(dev, interface, type); if (!serial) { retval = -ENOMEM; goto err_put_module; } /* if this device type has a probe function, call it */ if (type->probe) { const struct usb_device_id *id; id = get_iface_id(type, interface); retval = type->probe(serial, id); if (retval) { dev_dbg(ddev, "sub driver rejected device\n"); goto err_release_sibling; } } /* descriptor matches, let's find the endpoints needed */ epds = kzalloc(sizeof(*epds), GFP_KERNEL); if (!epds) { retval = -ENOMEM; goto err_release_sibling; } find_endpoints(serial, epds, interface); if (serial->sibling) find_endpoints(serial, epds, serial->sibling); if (epds->num_bulk_in < type->num_bulk_in || epds->num_bulk_out < type->num_bulk_out || epds->num_interrupt_in < type->num_interrupt_in || epds->num_interrupt_out < type->num_interrupt_out) { dev_err(ddev, "required endpoints missing\n"); retval = -ENODEV; goto err_free_epds; } if (type->calc_num_ports) { retval = type->calc_num_ports(serial, epds); if (retval < 0) goto err_free_epds; num_ports = retval; } if (!num_ports) num_ports = type->num_ports; if (num_ports > MAX_NUM_PORTS) { dev_warn(ddev, "too many ports requested: %d\n", num_ports); num_ports = MAX_NUM_PORTS; } serial->num_ports = (unsigned char)num_ports; serial->num_bulk_in = epds->num_bulk_in; serial->num_bulk_out = epds->num_bulk_out; serial->num_interrupt_in = epds->num_interrupt_in; serial->num_interrupt_out = epds->num_interrupt_out; /* found all that we need */ dev_info(ddev, "%s converter detected\n", type->description); /* create our ports, we need as many as the max endpoints */ /* we don't use num_ports here because some devices have more endpoint pairs than ports */ max_endpoints = max(epds->num_bulk_in, epds->num_bulk_out); max_endpoints = max(max_endpoints, epds->num_interrupt_in); max_endpoints = max(max_endpoints, epds->num_interrupt_out); max_endpoints = max(max_endpoints, serial->num_ports); serial->num_port_pointers = max_endpoints; dev_dbg(ddev, "setting up %d port structure(s)\n", max_endpoints); for (i = 0; i < max_endpoints; ++i) { port = kzalloc(sizeof(struct usb_serial_port), GFP_KERNEL); if (!port) { retval = -ENOMEM; goto err_free_epds; } tty_port_init(&port->port); port->port.ops = &serial_port_ops; port->serial = serial; spin_lock_init(&port->lock); /* Keep this for private driver use for the moment but should probably go away */ INIT_WORK(&port->work, usb_serial_port_work); serial->port[i] = port; port->dev.parent = &interface->dev; port->dev.driver = NULL; port->dev.bus = &usb_serial_bus_type; port->dev.release = &usb_serial_port_release; port->dev.groups = usb_serial_port_groups; device_initialize(&port->dev); } /* set up the endpoint information */ for (i = 0; i < epds->num_bulk_in; ++i) { retval = setup_port_bulk_in(serial->port[i], epds->bulk_in[i]); if (retval) goto err_free_epds; } for (i = 0; i < epds->num_bulk_out; ++i) { retval = setup_port_bulk_out(serial->port[i], epds->bulk_out[i]); if (retval) goto err_free_epds; } if (serial->type->read_int_callback) { for (i = 0; i < epds->num_interrupt_in; ++i) { retval = setup_port_interrupt_in(serial->port[i], epds->interrupt_in[i]); if (retval) goto err_free_epds; } } else if (epds->num_interrupt_in) { dev_dbg(ddev, "The device claims to support interrupt in transfers, but read_int_callback is not defined\n"); } if (serial->type->write_int_callback) { for (i = 0; i < epds->num_interrupt_out; ++i) { retval = setup_port_interrupt_out(serial->port[i], epds->interrupt_out[i]); if (retval) goto err_free_epds; } } else if (epds->num_interrupt_out) { dev_dbg(ddev, "The device claims to support interrupt out transfers, but write_int_callback is not defined\n"); } usb_set_intfdata(interface, serial); /* if this device type has an attach function, call it */ if (type->attach) { retval = type->attach(serial); if (retval < 0) goto err_free_epds; serial->attached = 1; if (retval > 0) { /* quietly accept this device, but don't bind to a serial port as it's about to disappear */ serial->num_ports = 0; goto exit; } } else { serial->attached = 1; } retval = allocate_minors(serial, num_ports); if (retval) { dev_err(ddev, "No more free serial minor numbers\n"); goto err_free_epds; } /* register all of the individual ports with the driver core */ for (i = 0; i < num_ports; ++i) { port = serial->port[i]; dev_set_name(&port->dev, "ttyUSB%d", port->minor); dev_dbg(ddev, "registering %s\n", dev_name(&port->dev)); device_enable_async_suspend(&port->dev); retval = device_add(&port->dev); if (retval) dev_err(ddev, "Error registering port device, continuing\n"); } if (num_ports > 0) usb_serial_console_init(serial->port[0]->minor); exit: kfree(epds); module_put(type->driver.owner); return 0; err_free_epds: kfree(epds); err_release_sibling: release_sibling(serial, interface); usb_serial_put(serial); err_put_module: module_put(type->driver.owner); return retval; } static void usb_serial_disconnect(struct usb_interface *interface) { int i; struct usb_serial *serial = usb_get_intfdata(interface); struct device *dev = &interface->dev; struct usb_serial_port *port; /* sibling interface is cleaning up */ if (!serial) return; usb_serial_console_disconnect(serial); mutex_lock(&serial->disc_mutex); /* must set a flag, to signal subdrivers */ serial->disconnected = 1; mutex_unlock(&serial->disc_mutex); for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; tty_port_tty_vhangup(&port->port); usb_serial_port_poison_urbs(port); wake_up_interruptible(&port->port.delta_msr_wait); cancel_work_sync(&port->work); if (device_is_registered(&port->dev)) device_del(&port->dev); } if (serial->type->disconnect) serial->type->disconnect(serial); release_sibling(serial, interface); /* let the last holder of this object cause it to be cleaned up */ usb_serial_put(serial); dev_info(dev, "device disconnected\n"); } int usb_serial_suspend(struct usb_interface *intf, pm_message_t message) { struct usb_serial *serial = usb_get_intfdata(intf); int i, r; /* suspend when called for first sibling interface */ if (serial->suspend_count++) return 0; /* * serial->type->suspend() MUST return 0 in system sleep context, * otherwise, the resume callback has to recover device from * previous suspend failure. */ if (serial->type->suspend) { r = serial->type->suspend(serial, message); if (r < 0) { serial->suspend_count--; return r; } } for (i = 0; i < serial->num_ports; ++i) usb_serial_port_poison_urbs(serial->port[i]); return 0; } EXPORT_SYMBOL(usb_serial_suspend); static void usb_serial_unpoison_port_urbs(struct usb_serial *serial) { int i; for (i = 0; i < serial->num_ports; ++i) usb_serial_port_unpoison_urbs(serial->port[i]); } int usb_serial_resume(struct usb_interface *intf) { struct usb_serial *serial = usb_get_intfdata(intf); int rv; /* resume when called for last sibling interface */ if (--serial->suspend_count) return 0; usb_serial_unpoison_port_urbs(serial); if (serial->type->resume) rv = serial->type->resume(serial); else rv = usb_serial_generic_resume(serial); return rv; } EXPORT_SYMBOL(usb_serial_resume); static int usb_serial_reset_resume(struct usb_interface *intf) { struct usb_serial *serial = usb_get_intfdata(intf); int rv; /* resume when called for last sibling interface */ if (--serial->suspend_count) return 0; usb_serial_unpoison_port_urbs(serial); if (serial->type->reset_resume) { rv = serial->type->reset_resume(serial); } else { rv = -EOPNOTSUPP; intf->needs_binding = 1; } return rv; } static const struct tty_operations serial_ops = { .open = serial_open, .close = serial_close, .write = serial_write, .hangup = serial_hangup, .write_room = serial_write_room, .ioctl = serial_ioctl, .set_termios = serial_set_termios, .throttle = serial_throttle, .unthrottle = serial_unthrottle, .break_ctl = serial_break, .chars_in_buffer = serial_chars_in_buffer, .wait_until_sent = serial_wait_until_sent, .tiocmget = serial_tiocmget, .tiocmset = serial_tiocmset, .get_icount = serial_get_icount, .set_serial = serial_set_serial, .get_serial = serial_get_serial, .cleanup = serial_cleanup, .install = serial_install, .proc_show = serial_proc_show, }; struct tty_driver *usb_serial_tty_driver; static int __init usb_serial_init(void) { int result; usb_serial_tty_driver = tty_alloc_driver(USB_SERIAL_TTY_MINORS, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV); if (IS_ERR(usb_serial_tty_driver)) return PTR_ERR(usb_serial_tty_driver); /* Initialize our global data */ result = bus_register(&usb_serial_bus_type); if (result) { pr_err("%s - registering bus driver failed\n", __func__); goto err_put_driver; } usb_serial_tty_driver->driver_name = "usbserial"; usb_serial_tty_driver->name = "ttyUSB"; usb_serial_tty_driver->major = USB_SERIAL_TTY_MAJOR; usb_serial_tty_driver->minor_start = 0; usb_serial_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; usb_serial_tty_driver->subtype = SERIAL_TYPE_NORMAL; usb_serial_tty_driver->init_termios = tty_std_termios; usb_serial_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; usb_serial_tty_driver->init_termios.c_ispeed = 9600; usb_serial_tty_driver->init_termios.c_ospeed = 9600; tty_set_operations(usb_serial_tty_driver, &serial_ops); result = tty_register_driver(usb_serial_tty_driver); if (result) { pr_err("%s - tty_register_driver failed\n", __func__); goto err_unregister_bus; } /* register the generic driver, if we should */ result = usb_serial_generic_register(); if (result < 0) { pr_err("%s - registering generic driver failed\n", __func__); goto err_unregister_driver; } return result; err_unregister_driver: tty_unregister_driver(usb_serial_tty_driver); err_unregister_bus: bus_unregister(&usb_serial_bus_type); err_put_driver: pr_err("%s - returning with error %d\n", __func__, result); tty_driver_kref_put(usb_serial_tty_driver); return result; } static void __exit usb_serial_exit(void) { usb_serial_console_exit(); usb_serial_generic_deregister(); tty_unregister_driver(usb_serial_tty_driver); tty_driver_kref_put(usb_serial_tty_driver); bus_unregister(&usb_serial_bus_type); idr_destroy(&serial_minors); } module_init(usb_serial_init); module_exit(usb_serial_exit); #define set_to_generic_if_null(type, function) \ do { \ if (!type->function) { \ type->function = usb_serial_generic_##function; \ pr_debug("%s: using generic " #function "\n", \ type->driver.name); \ } \ } while (0) static void usb_serial_operations_init(struct usb_serial_driver *device) { set_to_generic_if_null(device, open); set_to_generic_if_null(device, write); set_to_generic_if_null(device, close); set_to_generic_if_null(device, write_room); set_to_generic_if_null(device, chars_in_buffer); if (device->tx_empty) set_to_generic_if_null(device, wait_until_sent); set_to_generic_if_null(device, read_bulk_callback); set_to_generic_if_null(device, write_bulk_callback); set_to_generic_if_null(device, process_read_urb); set_to_generic_if_null(device, prepare_write_buffer); } static int usb_serial_register(struct usb_serial_driver *driver) { int retval; if (usb_disabled()) return -ENODEV; if (!driver->description) driver->description = driver->driver.name; if (!driver->usb_driver) { WARN(1, "Serial driver %s has no usb_driver\n", driver->description); return -EINVAL; } /* Prevent individual ports from being unbound. */ driver->driver.suppress_bind_attrs = true; usb_serial_operations_init(driver); /* Add this device to our list of devices */ mutex_lock(&table_lock); list_add(&driver->driver_list, &usb_serial_driver_list); retval = usb_serial_bus_register(driver); if (retval) { pr_err("problem %d when registering driver %s\n", retval, driver->description); list_del(&driver->driver_list); } else { pr_info("USB Serial support registered for %s\n", driver->description); } mutex_unlock(&table_lock); return retval; } static void usb_serial_deregister(struct usb_serial_driver *device) { pr_info("USB Serial deregistering driver %s\n", device->description); mutex_lock(&table_lock); list_del(&device->driver_list); mutex_unlock(&table_lock); usb_serial_bus_deregister(device); } /** * __usb_serial_register_drivers - register drivers for a usb-serial module * @serial_drivers: NULL-terminated array of pointers to drivers to be registered * @owner: owning module * @name: name of the usb_driver for this set of @serial_drivers * @id_table: list of all devices this @serial_drivers set binds to * * Registers all the drivers in the @serial_drivers array, and dynamically * creates a struct usb_driver with the name @name and id_table of @id_table. */ int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], struct module *owner, const char *name, const struct usb_device_id *id_table) { int rc; struct usb_driver *udriver; struct usb_serial_driver * const *sd; /* * udriver must be registered before any of the serial drivers, * because the store_new_id() routine for the serial drivers (in * bus.c) probes udriver. * * Performance hack: We don't want udriver to be probed until * the serial drivers are registered, because the probe would * simply fail for lack of a matching serial driver. * So we leave udriver's id_table set to NULL until we are all set. * * Suspend/resume support is implemented in the usb-serial core, * so fill in the PM-related fields in udriver. */ udriver = kzalloc(sizeof(*udriver), GFP_KERNEL); if (!udriver) return -ENOMEM; udriver->name = name; udriver->no_dynamic_id = 1; udriver->supports_autosuspend = 1; udriver->suspend = usb_serial_suspend; udriver->resume = usb_serial_resume; udriver->probe = usb_serial_probe; udriver->disconnect = usb_serial_disconnect; /* we only set the reset_resume field if the serial_driver has one */ for (sd = serial_drivers; *sd; ++sd) { if ((*sd)->reset_resume) { udriver->reset_resume = usb_serial_reset_resume; break; } } rc = usb_register(udriver); if (rc) goto err_free_driver; for (sd = serial_drivers; *sd; ++sd) { (*sd)->usb_driver = udriver; (*sd)->driver.owner = owner; rc = usb_serial_register(*sd); if (rc) goto err_deregister_drivers; } /* Now set udriver's id_table and look for matches */ udriver->id_table = id_table; rc = driver_attach(&udriver->driver); return 0; err_deregister_drivers: while (sd-- > serial_drivers) usb_serial_deregister(*sd); usb_deregister(udriver); err_free_driver: kfree(udriver); return rc; } EXPORT_SYMBOL_GPL(__usb_serial_register_drivers); /** * usb_serial_deregister_drivers - deregister drivers for a usb-serial module * @serial_drivers: NULL-terminated array of pointers to drivers to be deregistered * * Deregisters all the drivers in the @serial_drivers array and deregisters and * frees the struct usb_driver that was created by the call to * usb_serial_register_drivers(). */ void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]) { struct usb_driver *udriver = (*serial_drivers)->usb_driver; for (; *serial_drivers; ++serial_drivers) usb_serial_deregister(*serial_drivers); usb_deregister(udriver); kfree(udriver); } EXPORT_SYMBOL_GPL(usb_serial_deregister_drivers); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL v2");
1 2 3 3 3 4 4 1 3 3 3 1 1 3 1 2 6 6 4 3 3 2 2 3 4 3 3 1 1 19 3 37 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 // SPDX-License-Identifier: GPL-2.0-only #include <linux/fs.h> #include <linux/xattr.h> #include "overlayfs.h" static bool ovl_is_escaped_xattr(struct super_block *sb, const char *name) { struct ovl_fs *ofs = sb->s_fs_info; if (ofs->config.userxattr) return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX, OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0; else return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX, OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0; } static bool ovl_is_own_xattr(struct super_block *sb, const char *name) { struct ovl_fs *ofs = OVL_FS(sb); if (ofs->config.userxattr) return strncmp(name, OVL_XATTR_USER_PREFIX, OVL_XATTR_USER_PREFIX_LEN) == 0; else return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, OVL_XATTR_TRUSTED_PREFIX_LEN) == 0; } bool ovl_is_private_xattr(struct super_block *sb, const char *name) { return ovl_is_own_xattr(sb, name) && !ovl_is_escaped_xattr(sb, name); } static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { int err; struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdentry = ovl_i_dentry_upper(inode); struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); struct path realpath; const struct cred *old_cred; if (!value && !upperdentry) { ovl_path_lower(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); ovl_revert_creds(old_cred); if (err < 0) goto out; } if (!upperdentry) { err = ovl_copy_up(dentry); if (err) goto out; realdentry = ovl_dentry_upper(dentry); } err = ovl_want_write(dentry); if (err) goto out; old_cred = ovl_override_creds(dentry->d_sb); if (value) { err = ovl_do_setxattr(ofs, realdentry, name, value, size, flags); } else { WARN_ON(flags != XATTR_REPLACE); err = ovl_do_removexattr(ofs, realdentry, name); } ovl_revert_creds(old_cred); ovl_drop_write(dentry); /* copy c/mtime */ ovl_copyattr(inode); out: return err; } static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { ssize_t res; const struct cred *old_cred; struct path realpath; ovl_i_path_real(inode, &realpath); old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); ovl_revert_creds(old_cred); return res; } static bool ovl_can_list(struct super_block *sb, const char *s) { /* Never list private (.overlay) */ if (ovl_is_private_xattr(sb, s)) return false; /* List all non-trusted xattrs */ if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) return true; /* list other trusted for superuser only */ return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); struct ovl_fs *ofs = OVL_FS(dentry->d_sb); ssize_t res; size_t len; char *s; const struct cred *old_cred; size_t prefix_len, name_len; old_cred = ovl_override_creds(dentry->d_sb); res = vfs_listxattr(realdentry, list, size); ovl_revert_creds(old_cred); if (res <= 0 || size == 0) return res; prefix_len = ofs->config.userxattr ? OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN; /* filter out private xattrs */ for (s = list, len = res; len;) { size_t slen = strnlen(s, len) + 1; /* underlying fs providing us with an broken xattr list? */ if (WARN_ON(slen > len)) return -EIO; len -= slen; if (!ovl_can_list(dentry->d_sb, s)) { res -= slen; memmove(s, s + slen, len); } else if (ovl_is_escaped_xattr(dentry->d_sb, s)) { res -= OVL_XATTR_ESCAPE_PREFIX_LEN; name_len = slen - prefix_len - OVL_XATTR_ESCAPE_PREFIX_LEN; s += prefix_len; memmove(s, s + OVL_XATTR_ESCAPE_PREFIX_LEN, name_len + len); s += name_len; } else { s += slen; } } return res; } static char *ovl_xattr_escape_name(const char *prefix, const char *name) { size_t prefix_len = strlen(prefix); size_t name_len = strlen(name); size_t escaped_len; char *escaped, *s; escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len; if (escaped_len > XATTR_NAME_MAX) return ERR_PTR(-EOPNOTSUPP); escaped = kmalloc(escaped_len + 1, GFP_KERNEL); if (escaped == NULL) return ERR_PTR(-ENOMEM); s = escaped; memcpy(s, prefix, prefix_len); s += prefix_len; memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN); s += OVL_XATTR_ESCAPE_PREFIX_LEN; memcpy(s, name, name_len + 1); return escaped; } static int ovl_own_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { char *escaped; int r; escaped = ovl_xattr_escape_name(handler->prefix, name); if (IS_ERR(escaped)) return PTR_ERR(escaped); r = ovl_xattr_get(dentry, inode, escaped, buffer, size); kfree(escaped); return r; } static int ovl_own_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { char *escaped; int r; escaped = ovl_xattr_escape_name(handler->prefix, name); if (IS_ERR(escaped)) return PTR_ERR(escaped); r = ovl_xattr_set(dentry, inode, escaped, value, size, flags); kfree(escaped); return r; } static int ovl_other_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { return ovl_xattr_get(dentry, inode, name, buffer, size); } static int ovl_other_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) { return ovl_xattr_set(dentry, inode, name, value, size, flags); } static const struct xattr_handler ovl_own_trusted_xattr_handler = { .prefix = OVL_XATTR_TRUSTED_PREFIX, .get = ovl_own_xattr_get, .set = ovl_own_xattr_set, }; static const struct xattr_handler ovl_own_user_xattr_handler = { .prefix = OVL_XATTR_USER_PREFIX, .get = ovl_own_xattr_get, .set = ovl_own_xattr_set, }; static const struct xattr_handler ovl_other_xattr_handler = { .prefix = "", /* catch all */ .get = ovl_other_xattr_get, .set = ovl_other_xattr_set, }; static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = { &ovl_own_trusted_xattr_handler, &ovl_other_xattr_handler, NULL }; static const struct xattr_handler * const ovl_user_xattr_handlers[] = { &ovl_own_user_xattr_handler, &ovl_other_xattr_handler, NULL }; const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs) { return ofs->config.userxattr ? ovl_user_xattr_handlers : ovl_trusted_xattr_handlers; }
1 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 // SPDX-License-Identifier: GPL-2.0-only /* * HID driver for ELO usb touchscreen 4000/4500 * * Copyright (c) 2013 Jiri Slaby * * Data parsing taken from elousb driver by Vojtech Pavlik. */ #include <linux/hid.h> #include <linux/input.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/workqueue.h> #include "hid-ids.h" #define ELO_PERIODIC_READ_INTERVAL HZ #define ELO_SMARTSET_CMD_TIMEOUT 2000 /* msec */ /* Elo SmartSet commands */ #define ELO_FLUSH_SMARTSET_RESPONSES 0x02 /* Flush all pending smartset responses */ #define ELO_SEND_SMARTSET_COMMAND 0x05 /* Send a smartset command */ #define ELO_GET_SMARTSET_RESPONSE 0x06 /* Get a smartset response */ #define ELO_DIAG 0x64 /* Diagnostics command */ #define ELO_SMARTSET_PACKET_SIZE 8 struct elo_priv { struct usb_device *usbdev; struct delayed_work work; unsigned char buffer[ELO_SMARTSET_PACKET_SIZE]; }; static struct workqueue_struct *wq; static bool use_fw_quirk = true; module_param(use_fw_quirk, bool, S_IRUGO); MODULE_PARM_DESC(use_fw_quirk, "Do periodic pokes for broken M firmwares (default = true)"); static int elo_input_configured(struct hid_device *hdev, struct hid_input *hidinput) { struct input_dev *input = hidinput->input; /* * ELO devices have one Button usage in GenDesk field, which makes * hid-input map it to BTN_LEFT; that confuses userspace, which then * considers the device to be a mouse/touchpad instead of touchscreen. */ clear_bit(BTN_LEFT, input->keybit); set_bit(BTN_TOUCH, input->keybit); set_bit(ABS_PRESSURE, input->absbit); input_set_abs_params(input, ABS_PRESSURE, 0, 256, 0, 0); return 0; } static void elo_process_data(struct input_dev *input, const u8 *data, int size) { int press; input_report_abs(input, ABS_X, (data[3] << 8) | data[2]); input_report_abs(input, ABS_Y, (data[5] << 8) | data[4]); press = 0; if (data[1] & 0x80) press = (data[7] << 8) | data[6]; input_report_abs(input, ABS_PRESSURE, press); if (data[1] & 0x03) { input_report_key(input, BTN_TOUCH, 1); input_sync(input); } if (data[1] & 0x04) input_report_key(input, BTN_TOUCH, 0); input_sync(input); } static int elo_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct hid_input *hidinput; if (!(hdev->claimed & HID_CLAIMED_INPUT) || list_empty(&hdev->inputs)) return 0; hidinput = list_first_entry(&hdev->inputs, struct hid_input, list); switch (report->id) { case 0: if (data[0] == 'T') { /* Mandatory ELO packet marker */ elo_process_data(hidinput->input, data, size); return 1; } break; default: /* unknown report */ /* Unknown report type; pass upstream */ hid_info(hdev, "unknown report type %d\n", report->id); break; } return 0; } static int elo_smartset_send_get(struct usb_device *dev, u8 command, void *data) { unsigned int pipe; u8 dir; if (command == ELO_SEND_SMARTSET_COMMAND) { pipe = usb_sndctrlpipe(dev, 0); dir = USB_DIR_OUT; } else if (command == ELO_GET_SMARTSET_RESPONSE) { pipe = usb_rcvctrlpipe(dev, 0); dir = USB_DIR_IN; } else return -EINVAL; return usb_control_msg(dev, pipe, command, dir | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, ELO_SMARTSET_PACKET_SIZE, ELO_SMARTSET_CMD_TIMEOUT); } static int elo_flush_smartset_responses(struct usb_device *dev) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), ELO_FLUSH_SMARTSET_RESPONSES, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } static void elo_work(struct work_struct *work) { struct elo_priv *priv = container_of(work, struct elo_priv, work.work); struct usb_device *dev = priv->usbdev; unsigned char *buffer = priv->buffer; int ret; ret = elo_flush_smartset_responses(dev); if (ret < 0) { dev_err(&dev->dev, "initial FLUSH_SMARTSET_RESPONSES failed, error %d\n", ret); goto fail; } /* send Diagnostics command */ *buffer = ELO_DIAG; ret = elo_smartset_send_get(dev, ELO_SEND_SMARTSET_COMMAND, buffer); if (ret < 0) { dev_err(&dev->dev, "send Diagnostics Command failed, error %d\n", ret); goto fail; } /* get the result */ ret = elo_smartset_send_get(dev, ELO_GET_SMARTSET_RESPONSE, buffer); if (ret < 0) { dev_err(&dev->dev, "get Diagnostics Command response failed, error %d\n", ret); goto fail; } /* read the ack */ if (*buffer != 'A') { ret = elo_smartset_send_get(dev, ELO_GET_SMARTSET_RESPONSE, buffer); if (ret < 0) { dev_err(&dev->dev, "get acknowledge response failed, error %d\n", ret); goto fail; } } fail: ret = elo_flush_smartset_responses(dev); if (ret < 0) dev_err(&dev->dev, "final FLUSH_SMARTSET_RESPONSES failed, error %d\n", ret); queue_delayed_work(wq, &priv->work, ELO_PERIODIC_READ_INTERVAL); } /* * Not all Elo devices need the periodic HID descriptor reads. * Only firmware version M needs this. */ static bool elo_broken_firmware(struct usb_device *dev) { struct usb_device *hub = dev->parent; struct usb_device *child = NULL; u16 fw_lvl = le16_to_cpu(dev->descriptor.bcdDevice); u16 child_vid, child_pid; int i; if (!use_fw_quirk) return false; if (fw_lvl != 0x10d) return false; /* iterate sibling devices of the touch controller */ usb_hub_for_each_child(hub, i, child) { child_vid = le16_to_cpu(child->descriptor.idVendor); child_pid = le16_to_cpu(child->descriptor.idProduct); /* * If one of the devices below is present attached as a sibling of * the touch controller then this is a newer IBM 4820 monitor that * does not need the IBM-requested workaround if fw level is * 0x010d - aka 'M'. * No other HW can have this combination. */ if (child_vid==0x04b3) { switch (child_pid) { case 0x4676: /* 4820 21x Video */ case 0x4677: /* 4820 51x Video */ case 0x4678: /* 4820 2Lx Video */ case 0x4679: /* 4820 5Lx Video */ return false; } } } return true; } static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct elo_priv *priv; int ret; if (!hid_is_usb(hdev)) return -EINVAL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; INIT_DELAYED_WORK(&priv->work, elo_work); priv->usbdev = interface_to_usbdev(to_usb_interface(hdev->dev.parent)); hid_set_drvdata(hdev, priv); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err_free; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } if (elo_broken_firmware(priv->usbdev)) { hid_info(hdev, "broken firmware found, installing workaround\n"); queue_delayed_work(wq, &priv->work, ELO_PERIODIC_READ_INTERVAL); } return 0; err_free: kfree(priv); return ret; } static void elo_remove(struct hid_device *hdev) { struct elo_priv *priv = hid_get_drvdata(hdev); hid_hw_stop(hdev); cancel_delayed_work_sync(&priv->work); kfree(priv); } static const struct hid_device_id elo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009), }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030), }, { } }; MODULE_DEVICE_TABLE(hid, elo_devices); static struct hid_driver elo_driver = { .name = "elo", .id_table = elo_devices, .probe = elo_probe, .remove = elo_remove, .raw_event = elo_raw_event, .input_configured = elo_input_configured, }; static int __init elo_driver_init(void) { int ret; wq = create_singlethread_workqueue("elousb"); if (!wq) return -ENOMEM; ret = hid_register_driver(&elo_driver); if (ret) destroy_workqueue(wq); return ret; } module_init(elo_driver_init); static void __exit elo_driver_exit(void) { hid_unregister_driver(&elo_driver); destroy_workqueue(wq); } module_exit(elo_driver_exit); MODULE_AUTHOR("Jiri Slaby <jslaby@suse.cz>"); MODULE_DESCRIPTION("HID driver for ELO usb touchscreen 4000/4500"); MODULE_LICENSE("GPL");
1 1 1 1 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright 2019 Hans de Goede <hdegoede@redhat.com> */ #include <linux/module.h> #include <linux/pm.h> #include <linux/usb.h> #include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_state_helper.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> #include <drm/drm_format_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> static bool eco_mode; module_param(eco_mode, bool, 0644); MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); #define DRIVER_NAME "gm12u320" #define DRIVER_DESC "Grain Media GM12U320 USB projector display" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 /* * The DLP has an actual width of 854 pixels, but that is not a multiple * of 8, breaking things left and right, so we export a width of 848. */ #define GM12U320_USER_WIDTH 848 #define GM12U320_REAL_WIDTH 854 #define GM12U320_HEIGHT 480 #define GM12U320_BLOCK_COUNT 20 #define GM12U320_ERR(fmt, ...) \ DRM_DEV_ERROR(gm12u320->dev.dev, fmt, ##__VA_ARGS__) #define MISC_RCV_EPT 1 #define DATA_RCV_EPT 2 #define DATA_SND_EPT 3 #define MISC_SND_EPT 4 #define DATA_BLOCK_HEADER_SIZE 84 #define DATA_BLOCK_CONTENT_SIZE 64512 #define DATA_BLOCK_FOOTER_SIZE 20 #define DATA_BLOCK_SIZE (DATA_BLOCK_HEADER_SIZE + \ DATA_BLOCK_CONTENT_SIZE + \ DATA_BLOCK_FOOTER_SIZE) #define DATA_LAST_BLOCK_CONTENT_SIZE 4032 #define DATA_LAST_BLOCK_SIZE (DATA_BLOCK_HEADER_SIZE + \ DATA_LAST_BLOCK_CONTENT_SIZE + \ DATA_BLOCK_FOOTER_SIZE) #define CMD_SIZE 31 #define READ_STATUS_SIZE 13 #define MISC_VALUE_SIZE 4 #define CMD_TIMEOUT 200 #define DATA_TIMEOUT 1000 #define IDLE_TIMEOUT 2000 #define FIRST_FRAME_TIMEOUT 2000 #define MISC_REQ_GET_SET_ECO_A 0xff #define MISC_REQ_GET_SET_ECO_B 0x35 /* Windows driver does once every second, with arg d = 1, other args 0 */ #define MISC_REQ_UNKNOWN1_A 0xff #define MISC_REQ_UNKNOWN1_B 0x38 /* Windows driver does this on init, with arg a, b = 0, c = 0xa0, d = 4 */ #define MISC_REQ_UNKNOWN2_A 0xa5 #define MISC_REQ_UNKNOWN2_B 0x00 struct gm12u320_device { struct drm_device dev; struct drm_simple_display_pipe pipe; struct drm_connector conn; unsigned char *cmd_buf; unsigned char *data_buf[GM12U320_BLOCK_COUNT]; struct { struct delayed_work work; struct mutex lock; struct drm_framebuffer *fb; struct drm_rect rect; int frame; int draw_status_timeout; struct iosys_map src_map; } fb_update; }; #define to_gm12u320(__dev) container_of(__dev, struct gm12u320_device, dev) static const char cmd_data[CMD_SIZE] = { 0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x68, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x10, 0xff, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const char cmd_draw[CMD_SIZE] = { 0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0xfe, 0x00, 0x00, 0x00, 0xc0, 0xd1, 0x05, 0x00, 0x40, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const char cmd_misc[CMD_SIZE] = { 0x55, 0x53, 0x42, 0x43, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x80, 0x01, 0x10, 0xfd, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const char data_block_header[DATA_BLOCK_HEADER_SIZE] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x15, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x01, 0x00, 0x00, 0xdb }; static const char data_last_block_header[DATA_BLOCK_HEADER_SIZE] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x20, 0x00, 0xc0, 0x0f, 0x00, 0x00, 0x01, 0x00, 0x00, 0xd7 }; static const char data_block_footer[DATA_BLOCK_FOOTER_SIZE] = { 0xfb, 0x14, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x4f }; static inline struct usb_device *gm12u320_to_usb_device(struct gm12u320_device *gm12u320) { return interface_to_usbdev(to_usb_interface(gm12u320->dev.dev)); } static int gm12u320_usb_alloc(struct gm12u320_device *gm12u320) { int i, block_size; const char *hdr; gm12u320->cmd_buf = drmm_kmalloc(&gm12u320->dev, CMD_SIZE, GFP_KERNEL); if (!gm12u320->cmd_buf) return -ENOMEM; for (i = 0; i < GM12U320_BLOCK_COUNT; i++) { if (i == GM12U320_BLOCK_COUNT - 1) { block_size = DATA_LAST_BLOCK_SIZE; hdr = data_last_block_header; } else { block_size = DATA_BLOCK_SIZE; hdr = data_block_header; } gm12u320->data_buf[i] = drmm_kzalloc(&gm12u320->dev, block_size, GFP_KERNEL); if (!gm12u320->data_buf[i]) return -ENOMEM; memcpy(gm12u320->data_buf[i], hdr, DATA_BLOCK_HEADER_SIZE); memcpy(gm12u320->data_buf[i] + (block_size - DATA_BLOCK_FOOTER_SIZE), data_block_footer, DATA_BLOCK_FOOTER_SIZE); } return 0; } static int gm12u320_misc_request(struct gm12u320_device *gm12u320, u8 req_a, u8 req_b, u8 arg_a, u8 arg_b, u8 arg_c, u8 arg_d) { struct usb_device *udev = gm12u320_to_usb_device(gm12u320); int ret, len; memcpy(gm12u320->cmd_buf, &cmd_misc, CMD_SIZE); gm12u320->cmd_buf[20] = req_a; gm12u320->cmd_buf[21] = req_b; gm12u320->cmd_buf[22] = arg_a; gm12u320->cmd_buf[23] = arg_b; gm12u320->cmd_buf[24] = arg_c; gm12u320->cmd_buf[25] = arg_d; /* Send request */ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, MISC_SND_EPT), gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT); if (ret || len != CMD_SIZE) { GM12U320_ERR("Misc. req. error %d\n", ret); return -EIO; } /* Read value */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, MISC_RCV_EPT), gm12u320->cmd_buf, MISC_VALUE_SIZE, &len, DATA_TIMEOUT); if (ret || len != MISC_VALUE_SIZE) { GM12U320_ERR("Misc. value error %d\n", ret); return -EIO; } /* cmd_buf[0] now contains the read value, which we don't use */ /* Read status */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, MISC_RCV_EPT), gm12u320->cmd_buf, READ_STATUS_SIZE, &len, CMD_TIMEOUT); if (ret || len != READ_STATUS_SIZE) { GM12U320_ERR("Misc. status error %d\n", ret); return -EIO; } return 0; } static void gm12u320_32bpp_to_24bpp_packed(u8 *dst, u8 *src, int len) { while (len--) { *dst++ = *src++; *dst++ = *src++; *dst++ = *src++; src++; } } static void gm12u320_copy_fb_to_blocks(struct gm12u320_device *gm12u320) { int block, dst_offset, len, remain, ret, x1, x2, y1, y2; struct drm_framebuffer *fb; void *vaddr; u8 *src; mutex_lock(&gm12u320->fb_update.lock); if (!gm12u320->fb_update.fb) goto unlock; fb = gm12u320->fb_update.fb; x1 = gm12u320->fb_update.rect.x1; x2 = gm12u320->fb_update.rect.x2; y1 = gm12u320->fb_update.rect.y1; y2 = gm12u320->fb_update.rect.y2; vaddr = gm12u320->fb_update.src_map.vaddr; /* TODO: Use mapping abstraction properly */ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) { GM12U320_ERR("drm_gem_fb_begin_cpu_access err: %d\n", ret); goto put_fb; } src = vaddr + y1 * fb->pitches[0] + x1 * 4; x1 += (GM12U320_REAL_WIDTH - GM12U320_USER_WIDTH) / 2; x2 += (GM12U320_REAL_WIDTH - GM12U320_USER_WIDTH) / 2; for (; y1 < y2; y1++) { remain = 0; len = (x2 - x1) * 3; dst_offset = (y1 * GM12U320_REAL_WIDTH + x1) * 3; block = dst_offset / DATA_BLOCK_CONTENT_SIZE; dst_offset %= DATA_BLOCK_CONTENT_SIZE; if ((dst_offset + len) > DATA_BLOCK_CONTENT_SIZE) { remain = dst_offset + len - DATA_BLOCK_CONTENT_SIZE; len = DATA_BLOCK_CONTENT_SIZE - dst_offset; } dst_offset += DATA_BLOCK_HEADER_SIZE; len /= 3; gm12u320_32bpp_to_24bpp_packed( gm12u320->data_buf[block] + dst_offset, src, len); if (remain) { block++; dst_offset = DATA_BLOCK_HEADER_SIZE; gm12u320_32bpp_to_24bpp_packed( gm12u320->data_buf[block] + dst_offset, src + len * 4, remain / 3); } src += fb->pitches[0]; } drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); put_fb: drm_framebuffer_put(fb); gm12u320->fb_update.fb = NULL; unlock: mutex_unlock(&gm12u320->fb_update.lock); } static void gm12u320_fb_update_work(struct work_struct *work) { struct gm12u320_device *gm12u320 = container_of(to_delayed_work(work), struct gm12u320_device, fb_update.work); struct usb_device *udev = gm12u320_to_usb_device(gm12u320); int block, block_size, len; int ret = 0; gm12u320_copy_fb_to_blocks(gm12u320); for (block = 0; block < GM12U320_BLOCK_COUNT; block++) { if (block == GM12U320_BLOCK_COUNT - 1) block_size = DATA_LAST_BLOCK_SIZE; else block_size = DATA_BLOCK_SIZE; /* Send data command to device */ memcpy(gm12u320->cmd_buf, cmd_data, CMD_SIZE); gm12u320->cmd_buf[8] = block_size & 0xff; gm12u320->cmd_buf[9] = block_size >> 8; gm12u320->cmd_buf[20] = 0xfc - block * 4; gm12u320->cmd_buf[21] = block | (gm12u320->fb_update.frame << 7); ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, DATA_SND_EPT), gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT); if (ret || len != CMD_SIZE) goto err; /* Send data block to device */ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, DATA_SND_EPT), gm12u320->data_buf[block], block_size, &len, DATA_TIMEOUT); if (ret || len != block_size) goto err; /* Read status */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, DATA_RCV_EPT), gm12u320->cmd_buf, READ_STATUS_SIZE, &len, CMD_TIMEOUT); if (ret || len != READ_STATUS_SIZE) goto err; } /* Send draw command to device */ memcpy(gm12u320->cmd_buf, cmd_draw, CMD_SIZE); ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, DATA_SND_EPT), gm12u320->cmd_buf, CMD_SIZE, &len, CMD_TIMEOUT); if (ret || len != CMD_SIZE) goto err; /* Read status */ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, DATA_RCV_EPT), gm12u320->cmd_buf, READ_STATUS_SIZE, &len, gm12u320->fb_update.draw_status_timeout); if (ret || len != READ_STATUS_SIZE) goto err; gm12u320->fb_update.draw_status_timeout = CMD_TIMEOUT; gm12u320->fb_update.frame = !gm12u320->fb_update.frame; /* * We must draw a frame every 2s otherwise the projector * switches back to showing its logo. */ queue_delayed_work(system_long_wq, &gm12u320->fb_update.work, msecs_to_jiffies(IDLE_TIMEOUT)); return; err: /* Do not log errors caused by module unload or device unplug */ if (ret != -ENODEV && ret != -ECONNRESET && ret != -ESHUTDOWN) GM12U320_ERR("Frame update error: %d\n", ret); } static void gm12u320_fb_mark_dirty(struct drm_framebuffer *fb, const struct iosys_map *map, struct drm_rect *dirty) { struct gm12u320_device *gm12u320 = to_gm12u320(fb->dev); struct drm_framebuffer *old_fb = NULL; bool wakeup = false; mutex_lock(&gm12u320->fb_update.lock); if (gm12u320->fb_update.fb != fb) { old_fb = gm12u320->fb_update.fb; drm_framebuffer_get(fb); gm12u320->fb_update.fb = fb; gm12u320->fb_update.rect = *dirty; gm12u320->fb_update.src_map = *map; wakeup = true; } else { struct drm_rect *rect = &gm12u320->fb_update.rect; rect->x1 = min(rect->x1, dirty->x1); rect->y1 = min(rect->y1, dirty->y1); rect->x2 = max(rect->x2, dirty->x2); rect->y2 = max(rect->y2, dirty->y2); } mutex_unlock(&gm12u320->fb_update.lock); if (wakeup) mod_delayed_work(system_long_wq, &gm12u320->fb_update.work, 0); if (old_fb) drm_framebuffer_put(old_fb); } static void gm12u320_stop_fb_update(struct gm12u320_device *gm12u320) { struct drm_framebuffer *old_fb; cancel_delayed_work_sync(&gm12u320->fb_update.work); mutex_lock(&gm12u320->fb_update.lock); old_fb = gm12u320->fb_update.fb; gm12u320->fb_update.fb = NULL; iosys_map_clear(&gm12u320->fb_update.src_map); mutex_unlock(&gm12u320->fb_update.lock); drm_framebuffer_put(old_fb); } static int gm12u320_set_ecomode(struct gm12u320_device *gm12u320) { return gm12u320_misc_request(gm12u320, MISC_REQ_GET_SET_ECO_A, MISC_REQ_GET_SET_ECO_B, 0x01 /* set */, eco_mode ? 0x01 : 0x00, 0x00, 0x01); } /* ------------------------------------------------------------------ */ /* gm12u320 connector */ /* * We use fake EDID info so that userspace know that it is dealing with * an Acer projector, rather then listing this as an "unknown" monitor. * Note this assumes this driver is only ever used with the Acer C120, if we * add support for other devices the vendor and model should be parameterized. */ static const struct edid gm12u320_edid = { .header = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }, .mfg_id = { 0x04, 0x72 }, /* "ACR" */ .prod_code = { 0x20, 0xc1 }, /* C120h */ .serial = 0xaa55aa55, .mfg_week = 1, .mfg_year = 16, .version = 1, /* EDID 1.3 */ .revision = 3, /* EDID 1.3 */ .input = 0x08, /* Analog input */ .features = 0x0a, /* Pref timing in DTD 1 */ .standard_timings = { { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 } }, .detailed_timings = { { .pixel_clock = 3383, /* hactive = 848, hblank = 256 */ .data.pixel_data.hactive_lo = 0x50, .data.pixel_data.hblank_lo = 0x00, .data.pixel_data.hactive_hblank_hi = 0x31, /* vactive = 480, vblank = 28 */ .data.pixel_data.vactive_lo = 0xe0, .data.pixel_data.vblank_lo = 0x1c, .data.pixel_data.vactive_vblank_hi = 0x10, /* hsync offset 40 pw 128, vsync offset 1 pw 4 */ .data.pixel_data.hsync_offset_lo = 0x28, .data.pixel_data.hsync_pulse_width_lo = 0x80, .data.pixel_data.vsync_offset_pulse_width_lo = 0x14, .data.pixel_data.hsync_vsync_offset_pulse_width_hi = 0x00, /* Digital separate syncs, hsync+, vsync+ */ .data.pixel_data.misc = 0x1e, }, { .pixel_clock = 0, .data.other_data.type = 0xfd, /* Monitor ranges */ .data.other_data.data.range.min_vfreq = 59, .data.other_data.data.range.max_vfreq = 61, .data.other_data.data.range.min_hfreq_khz = 29, .data.other_data.data.range.max_hfreq_khz = 32, .data.other_data.data.range.pixel_clock_mhz = 4, /* 40 MHz */ .data.other_data.data.range.flags = 0, .data.other_data.data.range.formula.cvt = { 0xa0, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }, }, { .pixel_clock = 0, .data.other_data.type = 0xfc, /* Model string */ .data.other_data.data.str.str = { 'P', 'r', 'o', 'j', 'e', 'c', 't', 'o', 'r', '\n', ' ', ' ', ' ' }, }, { .pixel_clock = 0, .data.other_data.type = 0xfe, /* Unspecified text / padding */ .data.other_data.data.str.str = { '\n', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, } }, .checksum = 0x13, }; static int gm12u320_conn_get_modes(struct drm_connector *connector) { const struct drm_edid *drm_edid; int count; drm_edid = drm_edid_alloc(&gm12u320_edid, sizeof(gm12u320_edid)); drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); drm_edid_free(drm_edid); return count; } static const struct drm_connector_helper_funcs gm12u320_conn_helper_funcs = { .get_modes = gm12u320_conn_get_modes, }; static const struct drm_connector_funcs gm12u320_conn_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; static int gm12u320_conn_init(struct gm12u320_device *gm12u320) { drm_connector_helper_add(&gm12u320->conn, &gm12u320_conn_helper_funcs); return drm_connector_init(&gm12u320->dev, &gm12u320->conn, &gm12u320_conn_funcs, DRM_MODE_CONNECTOR_VGA); } /* ------------------------------------------------------------------ */ /* gm12u320 (simple) display pipe */ static void gm12u320_pipe_enable(struct drm_simple_display_pipe *pipe, struct drm_crtc_state *crtc_state, struct drm_plane_state *plane_state) { struct drm_rect rect = { 0, 0, GM12U320_USER_WIDTH, GM12U320_HEIGHT }; struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); gm12u320->fb_update.draw_status_timeout = FIRST_FRAME_TIMEOUT; gm12u320_fb_mark_dirty(plane_state->fb, &shadow_plane_state->data[0], &rect); } static void gm12u320_pipe_disable(struct drm_simple_display_pipe *pipe) { struct gm12u320_device *gm12u320 = to_gm12u320(pipe->crtc.dev); gm12u320_stop_fb_update(gm12u320); } static void gm12u320_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_plane_state *old_state) { struct drm_plane_state *state = pipe->plane.state; struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); struct drm_rect rect; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) gm12u320_fb_mark_dirty(state->fb, &shadow_plane_state->data[0], &rect); } static const struct drm_simple_display_pipe_funcs gm12u320_pipe_funcs = { .enable = gm12u320_pipe_enable, .disable = gm12u320_pipe_disable, .update = gm12u320_pipe_update, DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, }; static const uint32_t gm12u320_pipe_formats[] = { DRM_FORMAT_XRGB8888, }; static const uint64_t gm12u320_pipe_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; DEFINE_DRM_GEM_FOPS(gm12u320_fops); static const struct drm_driver gm12u320_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .name = DRIVER_NAME, .desc = DRIVER_DESC, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .fops = &gm12u320_fops, DRM_GEM_SHMEM_DRIVER_OPS, DRM_FBDEV_SHMEM_DRIVER_OPS, }; static const struct drm_mode_config_funcs gm12u320_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; static int gm12u320_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct gm12u320_device *gm12u320; struct drm_device *dev; struct device *dma_dev; int ret; /* * The gm12u320 presents itself to the system as 2 usb mass-storage * interfaces, we only care about / need the first one. */ if (interface->cur_altsetting->desc.bInterfaceNumber != 0) return -ENODEV; gm12u320 = devm_drm_dev_alloc(&interface->dev, &gm12u320_drm_driver, struct gm12u320_device, dev); if (IS_ERR(gm12u320)) return PTR_ERR(gm12u320); dev = &gm12u320->dev; dma_dev = usb_intf_get_dma_device(interface); if (dma_dev) { drm_dev_set_dma_dev(dev, dma_dev); put_device(dma_dev); } else { drm_warn(dev, "buffer sharing not supported"); /* not an error */ } INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work); mutex_init(&gm12u320->fb_update.lock); ret = drmm_mode_config_init(dev); if (ret) return ret; dev->mode_config.min_width = GM12U320_USER_WIDTH; dev->mode_config.max_width = GM12U320_USER_WIDTH; dev->mode_config.min_height = GM12U320_HEIGHT; dev->mode_config.max_height = GM12U320_HEIGHT; dev->mode_config.funcs = &gm12u320_mode_config_funcs; ret = gm12u320_usb_alloc(gm12u320); if (ret) return ret; ret = gm12u320_set_ecomode(gm12u320); if (ret) return ret; ret = gm12u320_conn_init(gm12u320); if (ret) return ret; ret = drm_simple_display_pipe_init(&gm12u320->dev, &gm12u320->pipe, &gm12u320_pipe_funcs, gm12u320_pipe_formats, ARRAY_SIZE(gm12u320_pipe_formats), gm12u320_pipe_modifiers, &gm12u320->conn); if (ret) return ret; drm_mode_config_reset(dev); usb_set_intfdata(interface, dev); ret = drm_dev_register(dev, 0); if (ret) return ret; drm_client_setup(dev, NULL); return 0; } static void gm12u320_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); } static int gm12u320_suspend(struct usb_interface *interface, pm_message_t message) { struct drm_device *dev = usb_get_intfdata(interface); return drm_mode_config_helper_suspend(dev); } static int gm12u320_resume(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); struct gm12u320_device *gm12u320 = to_gm12u320(dev); gm12u320_set_ecomode(gm12u320); return drm_mode_config_helper_resume(dev); } static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1de1, 0xc102) }, {}, }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver gm12u320_usb_driver = { .name = "gm12u320", .probe = gm12u320_usb_probe, .disconnect = gm12u320_usb_disconnect, .id_table = id_table, .suspend = pm_ptr(gm12u320_suspend), .resume = pm_ptr(gm12u320_resume), .reset_resume = pm_ptr(gm12u320_resume), }; module_usb_driver(gm12u320_usb_driver); MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); MODULE_DESCRIPTION("GM12U320 driver for USB projectors"); MODULE_LICENSE("GPL");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:net,iface type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 nomatch flag support added */ /* 2 /0 support added */ /* 3 Counters support added */ /* 4 Comments support added */ /* 5 Forceadd support added */ /* 6 skbinfo support added */ /* 7 interface wildcard support added */ #define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Type specific function prefix */ #define HTYPE hash_netiface #define IP_SET_HASH_WITH_NETS #define IP_SET_HASH_WITH_MULTI #define IP_SET_HASH_WITH_NET0 #define STRSCPY(a, b) strscpy(a, b, IFNAMSIZ) /* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; }; /* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; u8 wildcard; char iface[IFNAMSIZ]; }; /* Common functions */ static bool hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, const struct hash_netiface4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && (ip1->wildcard ? strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : strcmp(ip1->iface, ip2->iface) == 0); } static int hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) { elem->ip &= ip_set_netmask(cidr); elem->cidr = cidr; } static bool hash_netiface4_data_list(struct sk_buff *skb, const struct hash_netiface4_elem *data) { u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_netiface4_data_next(struct hash_netiface4_elem *next, const struct hash_netiface4_elem *d) { next->ip = d->ip; } #define MTYPE hash_netiface4 #define HOST_MASK 32 #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static const char *get_physindev_name(const struct sk_buff *skb, struct net *net) { struct net_device *dev = nf_bridge_get_physindev(skb, net); return dev ? dev->name : NULL; } static const char *get_physoutdev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physoutdev(skb); return dev ? dev->name : NULL; } #endif static int hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); #define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) : get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_netiface4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) e.wildcard = 1; } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip_to < ip) swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } if (retried) ip = ntohl(h->next.ip); do { i++; e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_netiface4_data_next(&h->next, &e); return -ERANGE; } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } while (ip++ < ip_to); return ret; } /* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; }; struct hash_netiface6_elem { union nf_inet_addr ip; u8 physdev; u8 cidr; u8 nomatch; u8 elem; u8 wildcard; char iface[IFNAMSIZ]; }; /* Common functions */ static bool hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, const struct hash_netiface6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && (ip1->wildcard ? strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : strcmp(ip1->iface, ip2->iface) == 0); } static int hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr) { ip6_netmask(&elem->ip, cidr); elem->cidr = cidr; } static bool hash_netiface6_data_list(struct sk_buff *skb, const struct hash_netiface6_elem *data) { u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_netiface6_data_next(struct hash_netiface6_elem *next, const struct hash_netiface6_elem *d) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_netiface6 #define HOST_MASK 128 #define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct hash_netiface6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6_netmask(&e.ip, e.cidr); if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) : get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRSCPY(e.iface, eiface); e.physdev = 1; #endif } else { STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); } if (strlen(e.iface) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IFACE] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ip6_netmask(&e.ip, e.cidr); nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) e.wildcard = 1; } ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } static struct ip_set_type hash_netiface_type __read_mostly = { .name = "hash:net,iface", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IFACE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_netiface_init(void) { return ip_set_type_register(&hash_netiface_type); } static void __exit hash_netiface_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_netiface_type); } module_init(hash_netiface_init); module_exit(hash_netiface_fini);
93 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner */ #include "bat_v.h" #include "main.h" #include <linux/atomic.h> #include <linux/cache.h> #include <linux/errno.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/genetlink.h> #include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" #include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "netlink.h" #include "originator.h" static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if) { batadv_v_elp_iface_activate(primary_if, hard_iface); batadv_hardif_put(primary_if); } /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can * set the interface as ACTIVE right away, without any risk of race * condition */ if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) hard_iface->if_status = BATADV_IF_ACTIVE; } static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) { int ret; ret = batadv_v_elp_iface_enable(hard_iface); if (ret < 0) return ret; ret = batadv_v_ogm_iface_enable(hard_iface); if (ret < 0) batadv_v_elp_iface_disable(hard_iface); return ret; } static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface) { batadv_v_ogm_iface_disable(hard_iface); batadv_v_elp_iface_disable(hard_iface); } static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) { batadv_v_elp_primary_iface_set(hard_iface); batadv_v_ogm_primary_iface_set(hard_iface); } /** * batadv_v_iface_update_mac() - react to hard-interface MAC address change * @hard_iface: the modified interface * * If the modified interface is the primary one, update the originator * address in the ELP and OGM messages to reflect the new MAC address. */ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if != hard_iface) goto out; batadv_v_primary_iface_set(hard_iface); out: batadv_hardif_put(primary_if); } static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { ewma_throughput_init(&hardif_neigh->bat_v.throughput); } /** * batadv_v_neigh_dump_neigh() - Dump a neighbour into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @hardif_neigh: Neighbour to dump * * Return: Error code, or 0 on success */ static int batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_hardif_neigh_node *hardif_neigh) { void *hdr; unsigned int last_seen_msecs; u32 throughput; last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); throughput = throughput * 100; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS); if (!hdr) return -ENOBUFS; if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, hardif_neigh->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, hardif_neigh->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, hardif_neigh->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs) || nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_v_neigh_dump_hardif() - Dump the neighbours of a hard interface into * a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the mesh interface information * @hard_iface: The hard interface to be dumped * @idx_s: Entries to be skipped * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface, int *idx_s) { struct batadv_hardif_neigh_node *hardif_neigh; int idx = 0; hlist_for_each_entry_rcu(hardif_neigh, &hard_iface->neigh_list, list) { if (idx++ < *idx_s) continue; if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) { *idx_s = idx - 1; return -EMSGSIZE; } } *idx_s = 0; return 0; } /** * batadv_v_neigh_dump() - Dump the neighbours of a hard interface into a * message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the mesh interface information * @single_hardif: Limit dumping to this hard interface */ static void batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hard_iface *single_hardif) { struct batadv_hard_iface *hard_iface; struct list_head *iter; int i_hardif = 0; int i_hardif_s = cb->args[0]; int idx = cb->args[1]; int portid = NETLINK_CB(cb->skb).portid; rcu_read_lock(); if (single_hardif) { if (i_hardif_s == 0) { if (batadv_v_neigh_dump_hardif(msg, portid, cb->nlh->nlmsg_seq, bat_priv, single_hardif, &idx) == 0) i_hardif++; } } else { netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) { if (i_hardif++ < i_hardif_s) continue; if (batadv_v_neigh_dump_hardif(msg, portid, cb->nlh->nlmsg_seq, bat_priv, hard_iface, &idx)) { i_hardif--; break; } } } rcu_read_unlock(); cb->args[0] = i_hardif; cb->args[1] = idx; } /** * batadv_v_orig_dump_subentry() - Dump an originator subentry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour * @best: Is the best originator * * Return: Error code, or 0 on success */ static int batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, bool best) { struct batadv_neigh_ifinfo *n_ifinfo; unsigned int last_seen_msecs; u32 throughput; void *hdr; n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); if (!n_ifinfo) return 0; throughput = n_ifinfo->bat_v.throughput * 100; batadv_neigh_ifinfo_put(n_ifinfo); last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); if (if_outgoing != BATADV_IF_DEFAULT && if_outgoing != neigh_node->if_incoming) return 0; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS); if (!hdr) return -ENOBUFS; if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) || nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, neigh_node->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, neigh_node->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, neigh_node->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) goto nla_put_failure; if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_v_orig_dump_entry() - Dump an originator entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct batadv_orig_node *orig_node, int *sub_s) { struct batadv_neigh_node *neigh_node_best; struct batadv_neigh_node *neigh_node; int sub = 0; bool best; neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); if (!neigh_node_best) goto out; hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { if (sub++ < *sub_s) continue; best = (neigh_node == neigh_node_best); if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv, if_outgoing, orig_node, neigh_node, best)) { batadv_neigh_node_put(neigh_node_best); *sub_s = sub - 1; return -EMSGSIZE; } } out: batadv_neigh_node_put(neigh_node_best); *sub_s = 0; return 0; } /** * batadv_v_orig_dump_bucket() - Dump an originator bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped * @sub: Number of sub entries to be skipped * * Return: Error code, or 0 on success */ static int batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct hlist_head *head, int *idx_s, int *sub) { struct batadv_orig_node *orig_node; int idx = 0; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (idx++ < *idx_s) continue; if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv, if_outgoing, orig_node, sub)) { rcu_read_unlock(); *idx_s = idx - 1; return -EMSGSIZE; } } rcu_read_unlock(); *idx_s = 0; *sub = 0; return 0; } /** * batadv_v_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the mesh interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; int bucket = cb->args[0]; int idx = cb->args[1]; int sub = cb->args[2]; int portid = NETLINK_CB(cb->skb).portid; while (bucket < hash->size) { head = &hash->table[bucket]; if (batadv_v_orig_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, bat_priv, if_outgoing, head, &idx, &sub)) break; bucket++; } cb->args[0] = bucket; cb->args[1] = idx; cb->args[2] = sub; } static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; batadv_neigh_ifinfo_put(ifinfo2); err_ifinfo2: batadv_neigh_ifinfo_put(ifinfo1); err_ifinfo1: return ret; } static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; u32 threshold; bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; threshold = ifinfo1->bat_v.throughput - threshold; ret = ifinfo2->bat_v.throughput > threshold; batadv_neigh_ifinfo_put(ifinfo2); err_ifinfo2: batadv_neigh_ifinfo_put(ifinfo1); err_ifinfo1: return ret; } /** * batadv_v_init_sel_class() - initialize GW selection class * @bat_priv: the bat priv with all the mesh interface information */ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) { /* set default throughput difference threshold to 5Mbps */ atomic_set(&bat_priv->gw.sel_class, 50); } /** * batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW * @gw_node: the GW to retrieve the metric for * @bw: the pointer where the metric will be stored. The metric is computed as * the minimum between the GW advertised throughput and the path throughput to * it in the mesh * * Return: 0 on success, -1 on failure */ static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw) { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; int ret = -1; orig_node = gw_node->orig_node; router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) goto out; router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); if (!router_ifinfo) goto out; /* the GW metric is computed as the minimum between the path throughput * to reach the GW itself and the advertised bandwidth. * This gives us an approximation of the effective throughput that the * client can expect via this particular GW node */ *bw = router_ifinfo->bat_v.throughput; *bw = min_t(u32, *bw, gw_node->bandwidth_down); ret = 0; out: batadv_neigh_node_put(router); batadv_neigh_ifinfo_put(router_ifinfo); return ret; } /** * batadv_v_gw_get_best_gw_node() - retrieve the best GW node * @bat_priv: the bat priv with all the mesh interface information * * Return: the GW node having the best GW-metric, NULL if no GW is known */ static struct batadv_gw_node * batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node, *curr_gw = NULL; u32 max_bw = 0, bw; rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { if (!kref_get_unless_zero(&gw_node->refcount)) continue; if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) goto next; if (curr_gw && bw <= max_bw) goto next; batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); max_bw = bw; next: batadv_gw_node_put(gw_node); } rcu_read_unlock(); return curr_gw; } /** * batadv_v_gw_is_eligible() - check if a originator would be selected as GW * @bat_priv: the bat priv with all the mesh interface information * @curr_gw_orig: originator representing the currently selected GW * @orig_node: the originator representing the new candidate * * Return: true if orig_node can be selected as current GW, false otherwise */ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node) { struct batadv_gw_node *curr_gw, *orig_gw = NULL; u32 gw_throughput, orig_throughput, threshold; bool ret = false; threshold = atomic_read(&bat_priv->gw.sel_class); curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig); if (!curr_gw) { ret = true; goto out; } if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) { ret = true; goto out; } orig_gw = batadv_gw_node_get(bat_priv, orig_node); if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) goto out; if (orig_throughput < gw_throughput) goto out; if ((orig_throughput - gw_throughput) < threshold) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n", gw_throughput, orig_throughput); ret = true; out: batadv_gw_node_put(curr_gw); batadv_gw_node_put(orig_gw); return ret; } /** * batadv_v_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options * @bat_priv: The bat priv with all the mesh interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success */ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_gw_node *gw_node) { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw = NULL; int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); if (!router) goto out; router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); if (!router_ifinfo) goto out; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); if (!hdr) { ret = -ENOBUFS; goto out; } genl_dump_check_consistent(cb, hdr); ret = -EMSGSIZE; if (curr_gw == gw_node) { if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { genlmsg_cancel(msg, hdr); goto out; } } if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, gw_node->orig_node->orig)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, router_ifinfo->bat_v.throughput)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, router->if_incoming->net_dev->name)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, router->if_incoming->net_dev->ifindex)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, gw_node->bandwidth_down)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) { genlmsg_cancel(msg, hdr); goto out; } genlmsg_end(msg, hdr); ret = 0; out: batadv_gw_node_put(curr_gw); batadv_neigh_ifinfo_put(router_ifinfo); batadv_neigh_node_put(router); return ret; } /** * batadv_v_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the mesh interface information */ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) { int portid = NETLINK_CB(cb->skb).portid; struct batadv_gw_node *gw_node; int idx_skip = cb->args[0]; int idx = 0; spin_lock_bh(&bat_priv->gw.list_lock); cb->seq = bat_priv->gw.generation << 1 | 1; hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) { if (idx++ < idx_skip) continue; if (batadv_v_gw_dump_entry(msg, portid, cb, bat_priv, gw_node)) { idx_skip = idx - 1; goto unlock; } } idx_skip = idx; unlock: spin_unlock_bh(&bat_priv->gw.list_lock); cb->args[0] = idx_skip; } static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { .activate = batadv_v_iface_activate, .enable = batadv_v_iface_enable, .disable = batadv_v_iface_disable, .update_mac = batadv_v_iface_update_mac, .primary_set = batadv_v_primary_iface_set, }, .neigh = { .hardif_init = batadv_v_hardif_neigh_init, .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, .dump = batadv_v_neigh_dump, }, .orig = { .dump = batadv_v_orig_dump, }, .gw = { .init_sel_class = batadv_v_init_sel_class, .sel_class_max = U32_MAX, .get_best_gw_node = batadv_v_gw_get_best_gw_node, .is_eligible = batadv_v_gw_is_eligible, .dump = batadv_v_gw_dump, }, }; /** * batadv_v_hardif_init() - initialize the algorithm specific fields in the * hard-interface object * @hard_iface: the hard-interface to initialize */ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) { /* enable link throughput auto-detection by setting the throughput * override to zero */ atomic_set(&hard_iface->bat_v.throughput_override, 0); atomic_set(&hard_iface->bat_v.elp_interval, 500); hard_iface->bat_v.aggr_len = 0; skb_queue_head_init(&hard_iface->bat_v.aggr_list); INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq, batadv_v_ogm_aggr_work); } /** * batadv_v_mesh_init() - initialize the B.A.T.M.A.N. V private resources for a * mesh * @bat_priv: the object representing the mesh interface to initialise * * Return: 0 on success or a negative error code otherwise */ int batadv_v_mesh_init(struct batadv_priv *bat_priv) { int ret = 0; ret = batadv_v_ogm_init(bat_priv); if (ret < 0) return ret; return 0; } /** * batadv_v_mesh_free() - free the B.A.T.M.A.N. V private resources for a mesh * @bat_priv: the object representing the mesh interface to free */ void batadv_v_mesh_free(struct batadv_priv *bat_priv) { batadv_v_ogm_free(bat_priv); } /** * batadv_v_init() - B.A.T.M.A.N. V initialization function * * Description: Takes care of initializing all the subcomponents. * It is invoked upon module load only. * * Return: 0 on success or a negative error code otherwise */ int __init batadv_v_init(void) { int ret; /* B.A.T.M.A.N. V echo location protocol packet */ ret = batadv_recv_handler_register(BATADV_ELP, batadv_v_elp_packet_recv); if (ret < 0) return ret; ret = batadv_recv_handler_register(BATADV_OGM2, batadv_v_ogm_packet_recv); if (ret < 0) goto elp_unregister; ret = batadv_algo_register(&batadv_batman_v); if (ret < 0) goto ogm_unregister; return ret; ogm_unregister: batadv_recv_handler_unregister(BATADV_OGM2); elp_unregister: batadv_recv_handler_unregister(BATADV_ELP); return ret; }
252 129 2592 274 71 1895 48 2 1 1 1 1 1 1 151 81 135 11 144 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_DCACHE_H #define __LINUX_DCACHE_H #include <linux/atomic.h> #include <linux/list.h> #include <linux/math.h> #include <linux/rculist.h> #include <linux/rculist_bl.h> #include <linux/spinlock.h> #include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> #include <linux/lockref.h> #include <linux/stringhash.h> #include <linux/wait.h> struct path; struct file; struct vfsmount; /* * linux/include/linux/dcache.h * * Dirent cache data structures * * (C) Copyright 1997 Thomas Schoebel-Theuer, * with heavy changes by Linus Torvalds */ #define IS_ROOT(x) ((x) == (x)->d_parent) /* The hash is always the low bits of hash_len */ #ifdef __LITTLE_ENDIAN #define HASH_LEN_DECLARE u32 hash; u32 len #define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) #else #define HASH_LEN_DECLARE u32 len; u32 hash #define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) #endif /* * "quick string" -- eases parameter passing, but more importantly * saves "metadata" about the string (ie length and the hash). * * hash comes first so it snuggles against d_parent in the * dentry. */ struct qstr { union { struct { HASH_LEN_DECLARE; }; u64 hash_len; }; const unsigned char *name; }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } #define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l) #define QSTR(n) QSTR_LEN(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; extern const struct qstr dotdot_name; /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the * large memory footprint increase). */ #ifdef CONFIG_64BIT # define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP # define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else # define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif #define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) union shortname_store { unsigned char string[DNAME_INLINE_LEN]; unsigned long words[DNAME_INLINE_WORDS]; }; #define d_lock d_lockref.lock #define d_iname d_shortname.string struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ seqcount_spinlock_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ union { struct qstr __d_name; /* for use ONLY in fs/dcache.c */ const struct qstr d_name; }; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ union shortname_store d_shortname; /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ /* --- cacheline 2 boundary (128 bytes) --- */ struct lockref d_lockref; /* per-dentry lock and refcount * keep separate from RCU lookup area if * possible! */ union { struct list_head d_lru; /* LRU list */ wait_queue_head_t *d_wait; /* in-lookup ones only */ }; struct hlist_node d_sib; /* child of parent list */ struct hlist_head d_children; /* our children */ /* * d_alias and d_rcu can share memory */ union { struct hlist_node d_alias; /* inode alias list */ struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; }; /* * dentry->d_lock spinlock nesting subclasses: * * 0: normal * 1: nested */ enum dentry_d_lock_class { DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */ DENTRY_D_LOCK_NESTED }; enum d_real_type { D_REAL_DATA, D_REAL_METADATA, }; struct dentry_operations { int (*d_revalidate)(struct inode *, const struct qstr *, struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); int (*d_init)(struct dentry *); void (*d_release)(struct dentry *); void (*d_prune)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); bool (*d_unalias_trylock)(const struct dentry *); void (*d_unalias_unlock)(const struct dentry *); } ____cacheline_aligned; /* * Locking rules for dentry_operations callbacks are to be found in * Documentation/filesystems/locking.rst. Keep it updated! * * FUrther descriptions are found in Documentation/filesystems/vfs.rst. * Keep it updated too! */ /* d_flags entries */ enum dentry_flags { DCACHE_OP_HASH = BIT(0), DCACHE_OP_COMPARE = BIT(1), DCACHE_OP_REVALIDATE = BIT(2), DCACHE_OP_DELETE = BIT(3), DCACHE_OP_PRUNE = BIT(4), /* * This dentry is possibly not currently connected to the dcache tree, * in which case its parent will either be itself, or will have this * flag as well. nfsd will not use a dentry with this bit set, but will * first endeavour to clear the bit either by discovering that it is * connected, or by performing lookup operations. Any filesystem which * supports nfsd_operations MUST have a lookup function which, if it * finds a directory inode with a DCACHE_DISCONNECTED dentry, will * d_move that dentry into place and return that dentry rather than the * passed one, typically using d_splice_alias. */ DCACHE_DISCONNECTED = BIT(5), DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ DCACHE_CANT_MOUNT = BIT(8), DCACHE_GENOCIDE = BIT(9), DCACHE_SHRINK_LIST = BIT(10), DCACHE_OP_WEAK_REVALIDATE = BIT(11), /* * this dentry has been "silly renamed" and has to be deleted on the * last dput() */ DCACHE_NFSFS_RENAMED = BIT(12), DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ DCACHE_DENTRY_KILLED = BIT(14), DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ DCACHE_LRU_LIST = BIT(18), DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ DCACHE_OP_REAL = BIT(23), DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ DCACHE_DENTRY_CURSOR = BIT(25), DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ }; #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) extern seqlock_t rename_lock; /* * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); /* weird procfs mess; *NOT* exported */ extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *, const struct dentry_operations *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); extern struct dentry *d_find_alias_rcu(struct inode *); /* test whether we have any submounts in a subdir tree */ extern int path_has_submounts(const struct path *); /* * This adds the entry to the hash queues. */ extern void d_rehash(struct dentry *); extern void d_add(struct dentry *, struct inode *); /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); static inline unsigned d_count(const struct dentry *dentry) { return dentry->d_lockref.count; } ino_t d_parent_ino(struct dentry *dentry); /* * helper function for dentry_operations.d_dname() members */ extern __printf(3, 4) char *dynamic_dname(char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); extern char *dentry_path_raw(const struct dentry *, char *, int); extern char *dentry_path(const struct dentry *, char *, int); /* Allocation counts.. */ /** * dget_dlock - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a live dentry, increment the reference count and return the dentry. * Caller must hold @dentry->d_lock. Making sure that dentry is alive is * caller's resonsibility. There are many conditions sufficient to guarantee * that; e.g. anything with non-negative refcount is alive, so's anything * hashed, anything positive, anyone's parent, etc. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { dentry->d_lockref.count++; return dentry; } /** * dget - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be * destroyed when it has references. Conversely, a dentry with * no references can disappear for any number of reasons, starting * with memory pressure. In other words, that primitive is * used to clone an existing reference; using it on something with * zero refcount is a bug. * * NOTE: it will spin if @dentry->d_lock is held. From the deadlock * avoidance point of view it is equivalent to spin_lock()/increment * refcount/spin_unlock(), so calling it under @dentry->d_lock is * always a bug; so's calling it under ->d_lock on any of its descendents. * */ static inline struct dentry *dget(struct dentry *dentry) { if (dentry) lockref_get(&dentry->d_lockref); return dentry; } extern struct dentry *dget_parent(struct dentry *dentry); /** * d_unhashed - is dentry hashed * @dentry: entry to check * * Returns true if the dentry passed is not currently hashed. */ static inline int d_unhashed(const struct dentry *dentry) { return hlist_bl_unhashed(&dentry->d_hash); } static inline int d_unlinked(const struct dentry *dentry) { return d_unhashed(dentry) && !IS_ROOT(dentry); } static inline int cant_mount(const struct dentry *dentry) { return (dentry->d_flags & DCACHE_CANT_MOUNT); } static inline void dont_mount(struct dentry *dentry) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_CANT_MOUNT; spin_unlock(&dentry->d_lock); } extern void __d_lookup_unhash_wake(struct dentry *dentry); static inline int d_in_lookup(const struct dentry *dentry) { return dentry->d_flags & DCACHE_PAR_LOOKUP; } static inline void d_lookup_done(struct dentry *dentry) { if (unlikely(d_in_lookup(dentry))) __d_lookup_unhash_wake(dentry); } extern void dput(struct dentry *); static inline bool d_managed(const struct dentry *dentry) { return dentry->d_flags & DCACHE_MANAGED_DENTRY; } static inline bool d_mountpoint(const struct dentry *dentry) { return dentry->d_flags & DCACHE_MOUNTED; } /* * Directory cache entry type accessor functions. */ static inline unsigned __d_entry_type(const struct dentry *dentry) { return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_MISS_TYPE; } static inline bool d_is_whiteout(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_WHITEOUT_TYPE; } static inline bool d_can_lookup(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE; } static inline bool d_is_autodir(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE; } static inline bool d_is_dir(const struct dentry *dentry) { return d_can_lookup(dentry) || d_is_autodir(dentry); } static inline bool d_is_symlink(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE; } static inline bool d_is_reg(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_REGULAR_TYPE; } static inline bool d_is_special(const struct dentry *dentry) { return __d_entry_type(dentry) == DCACHE_SPECIAL_TYPE; } static inline bool d_is_file(const struct dentry *dentry) { return d_is_reg(dentry) || d_is_special(dentry); } static inline bool d_is_negative(const struct dentry *dentry) { // TODO: check d_is_whiteout(dentry) also. return d_is_miss(dentry); } static inline bool d_flags_negative(unsigned flags) { return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE; } static inline bool d_is_positive(const struct dentry *dentry) { return !d_is_negative(dentry); } /** * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs) * @dentry: The dentry in question * * Returns true if the dentry represents either an absent name or a name that * doesn't map to an inode (ie. ->d_inode is NULL). The dentry could represent * a true miss, a whiteout that isn't represented by a 0,0 chardev or a * fallthrough marker in an opaque directory. * * Note! (1) This should be used *only* by a filesystem to examine its own * dentries. It should not be used to look at some other filesystem's * dentries. (2) It should also be used in combination with d_inode() to get * the inode. (3) The dentry may have something attached to ->d_lower and the * type field of the flags may be set to something other than miss or whiteout. */ static inline bool d_really_is_negative(const struct dentry *dentry) { return dentry->d_inode == NULL; } /** * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs) * @dentry: The dentry in question * * Returns true if the dentry represents a name that maps to an inode * (ie. ->d_inode is not NULL). The dentry might still represent a whiteout if * that is represented on medium as a 0,0 chardev. * * Note! (1) This should be used *only* by a filesystem to examine its own * dentries. It should not be used to look at some other filesystem's * dentries. (2) It should also be used in combination with d_inode() to get * the inode. */ static inline bool d_really_is_positive(const struct dentry *dentry) { return dentry->d_inode != NULL; } static inline int simple_positive(const struct dentry *dentry) { return d_really_is_positive(dentry) && !d_unhashed(dentry); } unsigned long vfs_pressure_ratio(unsigned long val); /** * d_inode - Get the actual inode of this dentry * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes * in their own dentries and ignore the layering superimposed upon them. */ static inline struct inode *d_inode(const struct dentry *dentry) { return dentry->d_inode; } /** * d_inode_rcu - Get the actual inode of this dentry with READ_ONCE() * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes * in their own dentries and ignore the layering superimposed upon them. */ static inline struct inode *d_inode_rcu(const struct dentry *dentry) { return READ_ONCE(dentry->d_inode); } /** * d_backing_inode - Get upper or lower inode we should be using * @upper: The upper layer * * This is the helper that should be used to get at the inode that will be used * if this dentry were to be opened as a file. The inode may be on the upper * dentry or it may be on a lower dentry pinned by the upper. * * Normal filesystems should not use this to access their own inodes. */ static inline struct inode *d_backing_inode(const struct dentry *upper) { struct inode *inode = upper->d_inode; return inode; } /** * d_real - Return the real dentry * @dentry: the dentry to query * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) return dentry->d_op->d_real(dentry, type); else return dentry; } /** * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { struct qstr name; union shortname_store inline_name; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); static inline struct dentry *d_first_child(const struct dentry *dentry) { return hlist_entry_safe(dentry->d_children.first, struct dentry, d_sib); } static inline struct dentry *d_next_sibling(const struct dentry *dentry) { return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib); } void set_default_d_op(struct super_block *, const struct dentry_operations *); #endif /* __LINUX_DCACHE_H */
12 19 128 1 40 277 289 128 14 26 206 193 191 191 58 23 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM block #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BLOCK_H #include <linux/blktrace_api.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/tracepoint.h> #include <uapi/linux/ioprio.h> #define RWBS_LEN 10 #define IOPRIO_CLASS_STRINGS \ { IOPRIO_CLASS_NONE, "none" }, \ { IOPRIO_CLASS_RT, "rt" }, \ { IOPRIO_CLASS_BE, "be" }, \ { IOPRIO_CLASS_IDLE, "idle" }, \ { IOPRIO_CLASS_INVALID, "invalid"} #ifdef CONFIG_BUFFER_HEAD DECLARE_EVENT_CLASS(block_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh), TP_STRUCT__entry ( __field( dev_t, dev ) __field( sector_t, sector ) __field( size_t, size ) ), TP_fast_assign( __entry->dev = bh->b_bdev->bd_dev; __entry->sector = bh->b_blocknr; __entry->size = bh->b_size; ), TP_printk("%d,%d sector=%llu size=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->sector, __entry->size ) ); /** * block_touch_buffer - mark a buffer accessed * @bh: buffer_head being touched * * Called from touch_buffer(). */ DEFINE_EVENT(block_buffer, block_touch_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_dirty_buffer - mark a buffer dirty * @bh: buffer_head being dirtied * * Called from mark_buffer_dirty(). */ DEFINE_EVENT(block_buffer, block_dirty_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); #endif /* CONFIG_BUFFER_HEAD */ /** * block_rq_requeue - place block IO request back on a queue * @rq: block IO operation request * * The block operation request @rq is being placed back into queue * @q. For some reason the request was not completed and needs to be * put back in the queue. */ TRACE_EVENT(block_rq_requeue, TP_PROTO(struct request *rq), TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), 0) ); DECLARE_EVENT_CLASS(block_rq_completion, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int , error ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = blk_status_to_errno(error); __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error) ); /** * block_rq_complete - block IO operation completed by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If * the @rq->bio is %NULL, then there is absolutely no additional work to * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ DEFINE_EVENT(block_rq_completion, block_rq_complete, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes) ); /** * block_rq_error - block IO operation error reported by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_error tracepoint event indicates that some portion * of operation request has failed as reported by the device driver. */ DEFINE_EVENT(block_rq_completion, block_rq_error, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes) ); DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request *rq), TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm) ); /** * block_rq_insert - insert block operation request into queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted * into queue @q. The fields in the operation request @rq struct can * be examined to determine which device and sectors the pending * operation would access. */ DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver * @rq: block IO operation request * * Called when block operation request @rq from queue @q is sent to a * device driver for processing. */ DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator * @rq: block IO operation request * * Called when block operation request @rq from queue @q is merged to another * request queued in the elevator. */ DEFINE_EVENT(block_rq, block_rq_merge, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_io_start - insert a request for execution * @rq: block IO operation request * * Called when block operation request @rq is queued for execution */ DEFINE_EVENT(block_rq, block_io_start, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_io_done - block IO operation request completed * @rq: block IO operation request * * Called when block operation request @rq is completed */ DEFINE_EVENT(block_rq, block_io_done, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. */ TRACE_EVENT(block_bio_complete, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct bio *bio), TP_ARGS(bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_backmerge - merging block operation to the end of an existing operation * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request. */ DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block request. */ DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_queue - putting new block IO operation in queue * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ DEFINE_EVENT(block_bio, block_bio_queue, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_getrq - get a free request entry in queue for block IO operations * @bio: pending block IO operation (can be %NULL) * * A request struct has been allocated to handle the block IO operation @bio. */ DEFINE_EVENT(block_bio, block_getrq, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * blk_zone_append_update_request_bio - update bio sector after zone append * @rq: the completed request that sets the bio sector * * Update the bio's bi_sector after a zone append command has been completed. */ DEFINE_EVENT(block_rq, blk_zone_append_update_request_bio, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_plug - keep operations requests in request queue * @q: request queue to plug * * Plug the request queue @q. Do not allow block operation requests * to be sent to the device driver. Instead, accumulate requests in * the queue to improve throughput performance of the block device. */ TRACE_EVENT(block_plug, TP_PROTO(struct request_queue *q), TP_ARGS(q), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s]", __entry->comm) ); DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); /** * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit) ); /** * block_split - split a single bio struct into two bio structs * @bio: block operation being split * @new_sector: The starting sector for the new bio * * The bio request @bio needs to be split into two bio requests. The newly * created @bio request starts at @new_sector. This split may be required due to * hardware limitations such as operation crossing device boundaries in a RAID * system. */ TRACE_EVENT(block_split, TP_PROTO(struct bio *bio, unsigned int new_sector), TP_ARGS(bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, (unsigned long long)__entry->new_sector, __entry->comm) ); /** * block_bio_remap - map request for a logical device to the raw device * @bio: revised operation * @dev: original device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the * raw block device. */ TRACE_EVENT(block_bio_remap, TP_PROTO(struct bio *bio, dev_t dev, sector_t from), TP_ARGS(bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector) ); /** * block_rq_remap - map request for a block operation request * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation * * The block operation request @rq in @q has been remapped. The block * operation request @rq holds the current information and @from hold * the original sector. */ TRACE_EVENT(block_rq_remap, TP_PROTO(struct request *rq, dev_t dev, sector_t from), TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __field( unsigned int, nr_bios ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = disk_devt(rq->q->disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; __entry->nr_bios = blk_rq_count_bios(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector, __entry->nr_bios) ); /** * blkdev_zone_mgmt - Execute a zone management operation on a range of zones * @bio: The block IO operation sent down to the device * @nr_sectors: The number of sectors affected by this operation * * Execute a zone management operation on a specified range of zones. This * range is encoded in %nr_sectors, which has to be a multiple of the zone * size. */ TRACE_EVENT(blkdev_zone_mgmt, TP_PROTO(struct bio *bio, sector_t nr_sectors), TP_ARGS(bio, nr_sectors), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, nr_sectors ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sectors = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sectors) ); DECLARE_EVENT_CLASS(block_zwplug, TP_PROTO(struct request_queue *q, unsigned int zno, sector_t sector, unsigned int nr_sectors), TP_ARGS(q, zno, sector, nr_sectors), TP_STRUCT__entry( __field( dev_t, dev ) __field( unsigned int, zno ) __field( sector_t, sector ) __field( unsigned int, nr_sectors ) ), TP_fast_assign( __entry->dev = disk_devt(q->disk); __entry->zno = zno; __entry->sector = sector; __entry->nr_sectors = nr_sectors; ), TP_printk("%d,%d zone %u, BIO %llu + %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->zno, (unsigned long long)__entry->sector, __entry->nr_sectors) ); DEFINE_EVENT(block_zwplug, disk_zone_wplug_add_bio, TP_PROTO(struct request_queue *q, unsigned int zno, sector_t sector, unsigned int nr_sectors), TP_ARGS(q, zno, sector, nr_sectors) ); DEFINE_EVENT(block_zwplug, blk_zone_wplug_bio, TP_PROTO(struct request_queue *q, unsigned int zno, sector_t sector, unsigned int nr_sectors), TP_ARGS(q, zno, sector, nr_sectors) ); #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
183 1 183 182 141 156 548 142 547 87 16 24 63 176 138 58 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/atomic.h> #include <linux/inetdevice.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_nat_masquerade.h> struct masq_dev_work { struct work_struct work; struct net *net; netns_tracker ns_tracker; union nf_inet_addr addr; int ifindex; int (*iter)(struct nf_conn *i, void *data); }; #define MAX_MASQ_WORKER_COUNT 16 static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt __read_mostly; static atomic_t masq_worker_count __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, const struct nf_nat_range2 *range, const struct net_device *out) { struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; struct nf_nat_range2 newrange; const struct rtable *rt; __be32 newsrc, nh; WARN_ON(hooknum != NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; rt = skb_rtable(skb); nh = rt_nexthop(rt, ip_hdr(skb)->daddr); newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); if (!newsrc) { pr_info("%s ate my IP address\n", out->name); return NF_DROP; } nat = nf_ct_nat_ext_add(ct); if (nat) nat->masq_index = out->ifindex; /* Transfer from original range. */ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.ip = newsrc; newrange.max_addr.ip = newsrc; newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); static void iterate_cleanup_work(struct work_struct *work) { struct nf_ct_iter_data iter_data = {}; struct masq_dev_work *w; w = container_of(work, struct masq_dev_work, work); iter_data.net = w->net; iter_data.data = (void *)w; nf_ct_iterate_cleanup_net(w->iter, &iter_data); put_net_track(w->net, &w->ns_tracker); kfree(w); atomic_dec(&masq_worker_count); module_put(THIS_MODULE); } /* Iterate conntrack table in the background and remove conntrack entries * that use the device/address being removed. * * In case too many work items have been queued already or memory allocation * fails iteration is skipped, conntrack entries will time out eventually. */ static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, int ifindex, int (*iter)(struct nf_conn *i, void *data), gfp_t gfp_flags) { struct masq_dev_work *w; if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) return; net = maybe_get_net(net); if (!net) return; if (!try_module_get(THIS_MODULE)) goto err_module; w = kzalloc(sizeof(*w), gfp_flags); if (w) { /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ atomic_inc(&masq_worker_count); INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = ifindex; w->net = net; netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); w->iter = iter; if (addr) w->addr = *addr; schedule_work(&w->work); return; } module_put(THIS_MODULE); err_module: put_net(net); } static int device_cmp(struct nf_conn *i, void *arg) { const struct nf_conn_nat *nat = nfct_nat(i); const struct masq_dev_work *w = arg; if (!nat) return 0; return nat->masq_index == w->ifindex; } static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for * conntracks which were associated with that device, * and forget them. */ nf_nat_masq_schedule(net, NULL, dev->ifindex, device_cmp, GFP_KERNEL); } return NOTIFY_DONE; } static int inet_cmp(struct nf_conn *ct, void *ptr) { struct nf_conntrack_tuple *tuple; struct masq_dev_work *w = ptr; if (!device_cmp(ct, ptr)) return 0; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); } static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { const struct in_ifaddr *ifa = ptr; const struct in_device *idev; const struct net_device *dev; union nf_inet_addr addr; if (event != NETDEV_DOWN) return NOTIFY_DONE; /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have * no work to do. Otherwise this is an individual address removal * and we have to perform the flush. */ idev = ifa->ifa_dev; if (idev->dead) return NOTIFY_DONE; memset(&addr, 0, sizeof(addr)); addr.ip = ifa->ifa_address; dev = idev->dev; nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, inet_cmp, GFP_KERNEL); return NOTIFY_DONE; } static struct notifier_block masq_dev_notifier = { .notifier_call = masq_device_event, }; static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; #if IS_ENABLED(CONFIG_IPV6) static int nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr) { #ifdef CONFIG_IPV6_MODULE const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return -EHOSTUNREACH; return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); #else return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); #endif } unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) { enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; struct in6_addr src; struct nf_conn *ct; struct nf_nat_range2 newrange; ct = nf_ct_get(skb, &ctinfo); WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); if (nat) nat->masq_index = out->ifindex; newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.in6 = src; newrange.max_addr.in6 = src; newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). * * Defer it to the system workqueue. * * As we can have 'a lot' of inet_events (depending on amount of ipv6 * addresses being deleted), we also need to limit work item queue. */ static int masq_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; const struct net_device *dev; union nf_inet_addr addr; if (event != NETDEV_DOWN) return NOTIFY_DONE; dev = ifa->idev->dev; memset(&addr, 0, sizeof(addr)); addr.in6 = ifa->addr; nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, GFP_ATOMIC); return NOTIFY_DONE; } static struct notifier_block masq_inet6_notifier = { .notifier_call = masq_inet6_event, }; static int nf_nat_masquerade_ipv6_register_notifier(void) { return register_inet6addr_notifier(&masq_inet6_notifier); } #else static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } #endif int nf_nat_masquerade_inet_register_notifiers(void) { int ret = 0; mutex_lock(&masq_mutex); if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { ret = -EOVERFLOW; goto out_unlock; } /* check if the notifier was already set */ if (++masq_refcnt > 1) goto out_unlock; /* Register for device down reports */ ret = register_netdevice_notifier(&masq_dev_notifier); if (ret) goto err_dec; /* Register IP address change reports */ ret = register_inetaddr_notifier(&masq_inet_notifier); if (ret) goto err_unregister; ret = nf_nat_masquerade_ipv6_register_notifier(); if (ret) goto err_unreg_inet; mutex_unlock(&masq_mutex); return ret; err_unreg_inet: unregister_inetaddr_notifier(&masq_inet_notifier); err_unregister: unregister_netdevice_notifier(&masq_dev_notifier); err_dec: masq_refcnt--; out_unlock: mutex_unlock(&masq_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); void nf_nat_masquerade_inet_unregister_notifiers(void) { mutex_lock(&masq_mutex); /* check if the notifiers still have clients */ if (--masq_refcnt > 0) goto out_unlock; unregister_netdevice_notifier(&masq_dev_notifier); unregister_inetaddr_notifier(&masq_inet_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&masq_inet6_notifier); #endif out_unlock: mutex_unlock(&masq_mutex); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
293 293 294 63 64 64 65 56 49 242 280 281 281 4 1 4 4 1 3 4 4 7 7 7 1 7 7 3 4 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 // SPDX-License-Identifier: GPL-2.0-or-later /* V4L2 device support. Copyright (C) 2008 Hans Verkuil <hverkuil@kernel.org> */ #include <linux/types.h> #include <linux/ioctl.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/videodev2.h> #include <media/v4l2-device.h> #include <media/v4l2-ctrls.h> int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev) { if (v4l2_dev == NULL) return -EINVAL; INIT_LIST_HEAD(&v4l2_dev->subdevs); spin_lock_init(&v4l2_dev->lock); v4l2_prio_init(&v4l2_dev->prio); kref_init(&v4l2_dev->ref); get_device(dev); v4l2_dev->dev = dev; if (dev == NULL) { /* If dev == NULL, then name must be filled in by the caller */ if (WARN_ON(!v4l2_dev->name[0])) return -EINVAL; return 0; } /* Set name to driver name + device name if it is empty. */ if (!v4l2_dev->name[0]) snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s %s", dev->driver->name, dev_name(dev)); if (!dev_get_drvdata(dev)) dev_set_drvdata(dev, v4l2_dev); return 0; } EXPORT_SYMBOL_GPL(v4l2_device_register); static void v4l2_device_release(struct kref *ref) { struct v4l2_device *v4l2_dev = container_of(ref, struct v4l2_device, ref); if (v4l2_dev->release) v4l2_dev->release(v4l2_dev); } int v4l2_device_put(struct v4l2_device *v4l2_dev) { return kref_put(&v4l2_dev->ref, v4l2_device_release); } EXPORT_SYMBOL_GPL(v4l2_device_put); int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename, atomic_t *instance) { int num = atomic_inc_return(instance) - 1; int len = strlen(basename); if (basename[len - 1] >= '0' && basename[len - 1] <= '9') snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s-%d", basename, num); else snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s%d", basename, num); return num; } EXPORT_SYMBOL_GPL(v4l2_device_set_name); void v4l2_device_disconnect(struct v4l2_device *v4l2_dev) { if (v4l2_dev->dev == NULL) return; if (dev_get_drvdata(v4l2_dev->dev) == v4l2_dev) dev_set_drvdata(v4l2_dev->dev, NULL); put_device(v4l2_dev->dev); v4l2_dev->dev = NULL; } EXPORT_SYMBOL_GPL(v4l2_device_disconnect); void v4l2_device_unregister(struct v4l2_device *v4l2_dev) { struct v4l2_subdev *sd, *next; /* Just return if v4l2_dev is NULL or if it was already * unregistered before. */ if (v4l2_dev == NULL || !v4l2_dev->name[0]) return; v4l2_device_disconnect(v4l2_dev); /* Unregister subdevs */ list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list) { v4l2_device_unregister_subdev(sd); if (sd->flags & V4L2_SUBDEV_FL_IS_I2C) v4l2_i2c_subdev_unregister(sd); else if (sd->flags & V4L2_SUBDEV_FL_IS_SPI) v4l2_spi_subdev_unregister(sd); } /* Mark as unregistered, thus preventing duplicate unregistrations */ v4l2_dev->name[0] = '\0'; } EXPORT_SYMBOL_GPL(v4l2_device_unregister); int __v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, struct v4l2_subdev *sd, struct module *module) { int err; /* Check for valid input */ if (!v4l2_dev || !sd || sd->v4l2_dev || !sd->name[0]) return -EINVAL; /* * The reason to acquire the module here is to avoid unloading * a module of sub-device which is registered to a media * device. To make it possible to unload modules for media * devices that also register sub-devices, do not * try_module_get() such sub-device owners. */ sd->owner_v4l2_dev = v4l2_dev->dev && v4l2_dev->dev->driver && module == v4l2_dev->dev->driver->owner; if (!sd->owner_v4l2_dev && !try_module_get(module)) return -ENODEV; sd->v4l2_dev = v4l2_dev; /* This just returns 0 if either of the two args is NULL */ err = v4l2_ctrl_add_handler(v4l2_dev->ctrl_handler, sd->ctrl_handler, NULL, true); if (err) goto error_module; #if defined(CONFIG_MEDIA_CONTROLLER) /* Register the entity. */ if (v4l2_dev->mdev) { err = media_device_register_entity(v4l2_dev->mdev, &sd->entity); if (err < 0) goto error_module; } #endif if (sd->internal_ops && sd->internal_ops->registered) { err = sd->internal_ops->registered(sd); if (err) goto error_unregister; } sd->owner = module; spin_lock(&v4l2_dev->lock); list_add_tail(&sd->list, &v4l2_dev->subdevs); spin_unlock(&v4l2_dev->lock); return 0; error_unregister: #if defined(CONFIG_MEDIA_CONTROLLER) media_device_unregister_entity(&sd->entity); #endif error_module: if (!sd->owner_v4l2_dev) module_put(sd->owner); sd->v4l2_dev = NULL; return err; } EXPORT_SYMBOL_GPL(__v4l2_device_register_subdev); static void v4l2_subdev_release(struct v4l2_subdev *sd) { struct module *owner = !sd->owner_v4l2_dev ? sd->owner : NULL; if (sd->internal_ops && sd->internal_ops->release) sd->internal_ops->release(sd); sd->devnode = NULL; module_put(owner); } static void v4l2_device_release_subdev_node(struct video_device *vdev) { v4l2_subdev_release(video_get_drvdata(vdev)); kfree(vdev); } int __v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev, bool read_only) { struct video_device *vdev; struct v4l2_subdev *sd; int err; /* Register a device node for every subdev marked with the * V4L2_SUBDEV_FL_HAS_DEVNODE flag. */ list_for_each_entry(sd, &v4l2_dev->subdevs, list) { if (!(sd->flags & V4L2_SUBDEV_FL_HAS_DEVNODE)) continue; if (sd->devnode) continue; vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { err = -ENOMEM; goto clean_up; } video_set_drvdata(vdev, sd); strscpy(vdev->name, sd->name, sizeof(vdev->name)); vdev->dev_parent = sd->dev; vdev->v4l2_dev = v4l2_dev; vdev->fops = &v4l2_subdev_fops; vdev->release = v4l2_device_release_subdev_node; vdev->ctrl_handler = sd->ctrl_handler; if (read_only) set_bit(V4L2_FL_SUBDEV_RO_DEVNODE, &vdev->flags); sd->devnode = vdev; err = __video_register_device(vdev, VFL_TYPE_SUBDEV, -1, 1, sd->owner); if (err < 0) { sd->devnode = NULL; kfree(vdev); goto clean_up; } #if defined(CONFIG_MEDIA_CONTROLLER) sd->entity.info.dev.major = VIDEO_MAJOR; sd->entity.info.dev.minor = vdev->minor; /* Interface is created by __video_register_device() */ if (vdev->v4l2_dev->mdev) { struct media_link *link; link = media_create_intf_link(&sd->entity, &vdev->intf_devnode->intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) { err = -ENOMEM; goto clean_up; } } #endif } return 0; clean_up: list_for_each_entry(sd, &v4l2_dev->subdevs, list) { if (!sd->devnode) break; video_unregister_device(sd->devnode); } return err; } EXPORT_SYMBOL_GPL(__v4l2_device_register_subdev_nodes); void v4l2_device_unregister_subdev(struct v4l2_subdev *sd) { struct v4l2_device *v4l2_dev; /* return if it isn't registered */ if (sd == NULL || sd->v4l2_dev == NULL) return; v4l2_dev = sd->v4l2_dev; spin_lock(&v4l2_dev->lock); list_del(&sd->list); spin_unlock(&v4l2_dev->lock); if (sd->internal_ops && sd->internal_ops->unregistered) sd->internal_ops->unregistered(sd); sd->v4l2_dev = NULL; #if defined(CONFIG_MEDIA_CONTROLLER) if (v4l2_dev->mdev) { /* * No need to explicitly remove links, as both pads and * links are removed by the function below, in the right order */ media_device_unregister_entity(&sd->entity); } #endif if (sd->devnode) video_unregister_device(sd->devnode); else v4l2_subdev_release(sd); } EXPORT_SYMBOL_GPL(v4l2_device_unregister_subdev);
2 3 1850 30 30 29 132 29 132 55 55 14 14 29 6 35 40 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 // SPDX-License-Identifier: GPL-2.0-only /* * fs/anon_inodes.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * * Thanks to Arnd Bergmann for code review and suggestions. * More changes for Thomas Gleixner suggestions. * */ #include <linux/cred.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/magic.h> #include <linux/anon_inodes.h> #include <linux/pseudo_fs.h> #include <linux/uaccess.h> #include "internal.h" static struct vfsmount *anon_inode_mnt __ro_after_init; static struct inode *anon_inode_inode __ro_after_init; /* * User space expects anonymous inodes to have no file type in st_mode. * * In particular, 'lsof' has this legacy logic: * * type = s->st_mode & S_IFMT; * switch (type) { * ... * case 0: * if (!strcmp(p, "anon_inode")) * Lf->ntype = Ntype = N_ANON_INODE; * * to detect our old anon_inode logic. * * Rather than mess with our internal sane inode data, just fix it * up here in getattr() by masking off the format bits. */ int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->mode &= ~S_IFMT; return 0; } int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return -EOPNOTSUPP; } static const struct inode_operations anon_inode_operations = { .getattr = anon_inode_getattr, .setattr = anon_inode_setattr, }; /* * anon_inodefs_dname() is called from d_path(). */ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(buffer, buflen, "anon_inode:%s", dentry->d_name.name); } static const struct dentry_operations anon_inodefs_dentry_operations = { .d_dname = anon_inodefs_dname, }; static int anon_inodefs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC); if (!ctx) return -ENOMEM; fc->s_iflags |= SB_I_NOEXEC; fc->s_iflags |= SB_I_NODEV; ctx->dops = &anon_inodefs_dentry_operations; return 0; } static struct file_system_type anon_inode_fs_type = { .name = "anon_inodefs", .init_fs_context = anon_inodefs_init_fs_context, .kill_sb = kill_anon_super, }; /** * anon_inode_make_secure_inode - allocate an anonymous inode with security context * @sb: [in] Superblock to allocate from * @name: [in] Name of the class of the newfile (e.g., "secretmem") * @context_inode: * [in] Optional parent inode for security inheritance * * The function ensures proper security initialization through the LSM hook * security_inode_init_security_anon(). * * Return: Pointer to new inode on success, ERR_PTR on failure. */ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, const struct inode *context_inode) { struct inode *inode; int error; inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; inode->i_op = &anon_inode_operations; error = security_inode_init_security_anon(inode, &QSTR(name), context_inode); if (error) { iput(inode); return ERR_PTR(error); } return inode; } EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode, bool make_inode) { struct inode *inode; struct file *file; if (fops->owner && !try_module_get(fops->owner)) return ERR_PTR(-ENOENT); if (make_inode) { inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; } } else { inode = anon_inode_inode; if (IS_ERR(inode)) { file = ERR_PTR(-ENODEV); goto err; } /* * We know the anon_inode inode count is always * greater than zero, so ihold() is safe. */ ihold(inode); } file = alloc_file_pseudo(inode, anon_inode_mnt, name, flags & (O_ACCMODE | O_NONBLOCK), fops); if (IS_ERR(file)) goto err_iput; file->f_mapping = inode->i_mapping; file->private_data = priv; return file; err_iput: iput(inode); err: module_put(fops->owner); return file; } /** * anon_inode_getfile - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" * of the file * * @name: [in] name of the "class" of the new file * @fops: [in] file operations for the new file * @priv: [in] private data for the new file (will be file's private_data) * @flags: [in] flags * * Creates a new file by hooking it on a single inode. This is useful for files * that do not need to have a full-fledged inode in order to operate correctly. * All the files created with anon_inode_getfile() will share a single inode, * hence saving memory and avoiding code duplication for the file/inode/dentry * setup. Returns the newly created file* or an error pointer. */ struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags) { return __anon_inode_getfile(name, fops, priv, flags, NULL, false); } EXPORT_SYMBOL_GPL(anon_inode_getfile); /** * anon_inode_getfile_fmode - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" * of the file * * @name: [in] name of the "class" of the new file * @fops: [in] file operations for the new file * @priv: [in] private data for the new file (will be file's private_data) * @flags: [in] flags * @f_mode: [in] fmode * * Creates a new file by hooking it on a single inode. This is useful for files * that do not need to have a full-fledged inode in order to operate correctly. * All the files created with anon_inode_getfile() will share a single inode, * hence saving memory and avoiding code duplication for the file/inode/dentry * setup. Allows setting the fmode. Returns the newly created file* or an error * pointer. */ struct file *anon_inode_getfile_fmode(const char *name, const struct file_operations *fops, void *priv, int flags, fmode_t f_mode) { struct file *file; file = __anon_inode_getfile(name, fops, priv, flags, NULL, false); if (!IS_ERR(file)) file->f_mode |= f_mode; return file; } EXPORT_SYMBOL_GPL(anon_inode_getfile_fmode); /** * anon_inode_create_getfile - Like anon_inode_getfile(), but creates a new * !S_PRIVATE anon inode rather than reuse the * singleton anon inode and calls the * inode_init_security_anon() LSM hook. * * @name: [in] name of the "class" of the new file * @fops: [in] file operations for the new file * @priv: [in] private data for the new file (will be file's private_data) * @flags: [in] flags * @context_inode: * [in] the logical relationship with the new inode (optional) * * Create a new anonymous inode and file pair. This can be done for two * reasons: * * - for the inode to have its own security context, so that LSMs can enforce * policy on the inode's creation; * * - if the caller needs a unique inode, for example in order to customize * the size returned by fstat() * * The LSM may use @context_inode in inode_init_security_anon(), but a * reference to it is not held. * * Returns the newly created file* or an error pointer. */ struct file *anon_inode_create_getfile(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode) { return __anon_inode_getfile(name, fops, priv, flags, context_inode, true); } EXPORT_SYMBOL_GPL(anon_inode_create_getfile); static int __anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode, bool make_inode) { int error, fd; struct file *file; error = get_unused_fd_flags(flags); if (error < 0) return error; fd = error; file = __anon_inode_getfile(name, fops, priv, flags, context_inode, make_inode); if (IS_ERR(file)) { error = PTR_ERR(file); goto err_put_unused_fd; } fd_install(fd, file); return fd; err_put_unused_fd: put_unused_fd(fd); return error; } /** * anon_inode_getfd - creates a new file instance by hooking it up to * an anonymous inode and a dentry that describe * the "class" of the file * * @name: [in] name of the "class" of the new file * @fops: [in] file operations for the new file * @priv: [in] private data for the new file (will be file's private_data) * @flags: [in] flags * * Creates a new file by hooking it on a single inode. This is * useful for files that do not need to have a full-fledged inode in * order to operate correctly. All the files created with * anon_inode_getfd() will use the same singleton inode, reducing * memory use and avoiding code duplication for the file/inode/dentry * setup. Returns a newly created file descriptor or an error code. */ int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags) { return __anon_inode_getfd(name, fops, priv, flags, NULL, false); } EXPORT_SYMBOL_GPL(anon_inode_getfd); /** * anon_inode_create_getfd - Like anon_inode_getfd(), but creates a new * !S_PRIVATE anon inode rather than reuse the singleton anon inode, and calls * the inode_init_security_anon() LSM hook. * * @name: [in] name of the "class" of the new file * @fops: [in] file operations for the new file * @priv: [in] private data for the new file (will be file's private_data) * @flags: [in] flags * @context_inode: * [in] the logical relationship with the new inode (optional) * * Create a new anonymous inode and file pair. This can be done for two * reasons: * * - for the inode to have its own security context, so that LSMs can enforce * policy on the inode's creation; * * - if the caller needs a unique inode, for example in order to customize * the size returned by fstat() * * The LSM may use @context_inode in inode_init_security_anon(), but a * reference to it is not held. * * Returns a newly created file descriptor or an error code. */ int anon_inode_create_getfd(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode) { return __anon_inode_getfd(name, fops, priv, flags, context_inode, true); } static int __init anon_inode_init(void) { anon_inode_mnt = kern_mount(&anon_inode_fs_type); if (IS_ERR(anon_inode_mnt)) panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); if (IS_ERR(anon_inode_inode)) panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); anon_inode_inode->i_op = &anon_inode_operations; return 0; } fs_initcall(anon_inode_init);
1 1 1 1 1 1 1 1 1 1 1 1 6 1 2 1 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 /* * Davicom DM96xx USB 10/100Mbps ethernet devices * * Peter Korsgaard <jacmet@sunsite.dk> * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. */ //#define DEBUG #include <linux/module.h> #include <linux/sched.h> #include <linux/stddef.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include <linux/slab.h> /* datasheet: http://ptm2.cc.utu.fi/ftp/network/cards/DM9601/From_NET/DM9601-DS-P01-930914.pdf */ /* control requests */ #define DM_READ_REGS 0x00 #define DM_WRITE_REGS 0x01 #define DM_READ_MEMS 0x02 #define DM_WRITE_REG 0x03 #define DM_WRITE_MEMS 0x05 #define DM_WRITE_MEM 0x07 /* registers */ #define DM_NET_CTRL 0x00 #define DM_RX_CTRL 0x05 #define DM_SHARED_CTRL 0x0b #define DM_SHARED_ADDR 0x0c #define DM_SHARED_DATA 0x0d /* low + high */ #define DM_PHY_ADDR 0x10 /* 6 bytes */ #define DM_MCAST_ADDR 0x16 /* 8 bytes */ #define DM_GPR_CTRL 0x1e #define DM_GPR_DATA 0x1f #define DM_CHIP_ID 0x2c #define DM_MODE_CTRL 0x91 /* only on dm9620 */ /* chip id values */ #define ID_DM9601 0 #define ID_DM9620 1 #define DM_MAX_MCAST 64 #define DM_MCAST_SIZE 8 #define DM_EEPROM_LEN 256 #define DM_TX_OVERHEAD 2 /* 2 byte header */ #define DM_RX_OVERHEAD 7 /* 3 byte header + 4 byte crc tail */ #define DM_TIMEOUT 1000 static int dm_read(struct usbnet *dev, u8 reg, u16 length, void *data) { int err; err = usbnet_read_cmd(dev, DM_READ_REGS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, reg, data, length); if(err != length && err >= 0) err = -EINVAL; return err; } static int dm_read_reg(struct usbnet *dev, u8 reg, u8 *value) { return dm_read(dev, reg, 1, value); } static int dm_write(struct usbnet *dev, u8 reg, u16 length, void *data) { int err; err = usbnet_write_cmd(dev, DM_WRITE_REGS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, reg, data, length); if (err >= 0 && err < length) err = -EINVAL; return err; } static int dm_write_reg(struct usbnet *dev, u8 reg, u8 value) { return usbnet_write_cmd(dev, DM_WRITE_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, reg, NULL, 0); } static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, const void *data) { usbnet_write_cmd_async(dev, DM_WRITE_REGS, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, reg, data, length); } static void dm_write_reg_async(struct usbnet *dev, u8 reg, u8 value) { usbnet_write_cmd_async(dev, DM_WRITE_REG, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, reg, NULL, 0); } static int dm_read_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 *value) { int ret, i; mutex_lock(&dev->phy_mutex); dm_write_reg(dev, DM_SHARED_ADDR, phy ? (reg | 0x40) : reg); dm_write_reg(dev, DM_SHARED_CTRL, phy ? 0xc : 0x4); for (i = 0; i < DM_TIMEOUT; i++) { u8 tmp = 0; udelay(1); ret = dm_read_reg(dev, DM_SHARED_CTRL, &tmp); if (ret < 0) goto out; /* ready */ if ((tmp & 1) == 0) break; } if (i == DM_TIMEOUT) { netdev_err(dev->net, "%s read timed out!\n", phy ? "phy" : "eeprom"); ret = -EIO; goto out; } dm_write_reg(dev, DM_SHARED_CTRL, 0x0); ret = dm_read(dev, DM_SHARED_DATA, 2, value); netdev_dbg(dev->net, "read shared %d 0x%02x returned 0x%04x, %d\n", phy, reg, *value, ret); out: mutex_unlock(&dev->phy_mutex); return ret; } static int dm_write_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 value) { int ret, i; mutex_lock(&dev->phy_mutex); ret = dm_write(dev, DM_SHARED_DATA, 2, &value); if (ret < 0) goto out; dm_write_reg(dev, DM_SHARED_ADDR, phy ? (reg | 0x40) : reg); dm_write_reg(dev, DM_SHARED_CTRL, phy ? 0x1a : 0x12); for (i = 0; i < DM_TIMEOUT; i++) { u8 tmp = 0; udelay(1); ret = dm_read_reg(dev, DM_SHARED_CTRL, &tmp); if (ret < 0) goto out; /* ready */ if ((tmp & 1) == 0) break; } if (i == DM_TIMEOUT) { netdev_err(dev->net, "%s write timed out!\n", phy ? "phy" : "eeprom"); ret = -EIO; goto out; } dm_write_reg(dev, DM_SHARED_CTRL, 0x0); out: mutex_unlock(&dev->phy_mutex); return ret; } static int dm_read_eeprom_word(struct usbnet *dev, u8 offset, void *value) { return dm_read_shared_word(dev, 0, offset, value); } static int dm9601_get_eeprom_len(struct net_device *dev) { return DM_EEPROM_LEN; } static int dm9601_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 * data) { struct usbnet *dev = netdev_priv(net); __le16 *ebuf = (__le16 *) data; int i; /* access is 16bit */ if ((eeprom->offset % 2) || (eeprom->len % 2)) return -EINVAL; for (i = 0; i < eeprom->len / 2; i++) { if (dm_read_eeprom_word(dev, eeprom->offset / 2 + i, &ebuf[i]) < 0) return -EINVAL; } return 0; } static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; int err; if (phy_id) { netdev_dbg(dev->net, "Only internal phy supported\n"); return 0; } err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); return 0; } netdev_dbg(dev->net, "dm9601_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", phy_id, loc, le16_to_cpu(res)); return le16_to_cpu(res); } static void dm9601_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); if (phy_id) { netdev_dbg(dev->net, "Only internal phy supported\n"); return; } netdev_dbg(dev->net, "dm9601_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); dm_write_shared_word(dev, 1, loc, res); } static void dm9601_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { /* Inherit standard device info */ usbnet_get_drvinfo(net, info); } static u32 dm9601_get_link(struct net_device *net) { struct usbnet *dev = netdev_priv(net); return mii_link_ok(&dev->mii); } static int dm9601_ioctl(struct net_device *net, struct ifreq *rq, int cmd) { struct usbnet *dev = netdev_priv(net); return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL); } static const struct ethtool_ops dm9601_ethtool_ops = { .get_drvinfo = dm9601_get_drvinfo, .get_link = dm9601_get_link, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_eeprom_len = dm9601_get_eeprom_len, .get_eeprom = dm9601_get_eeprom, .nway_reset = usbnet_nway_reset, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static void dm9601_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); /* We use the 20 byte dev->data for our 8 byte filter buffer * to avoid allocating memory that is tricky to free later */ u8 *hashes = (u8 *) & dev->data; u8 rx_ctl = 0x31; memset(hashes, 0x00, DM_MCAST_SIZE); hashes[DM_MCAST_SIZE - 1] |= 0x80; /* broadcast address */ if (net->flags & IFF_PROMISC) { rx_ctl |= 0x02; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > DM_MAX_MCAST) { rx_ctl |= 0x08; } else if (!netdev_mc_empty(net)) { struct netdev_hw_addr *ha; netdev_for_each_mc_addr(ha, net) { u32 crc = ether_crc(ETH_ALEN, ha->addr) >> 26; hashes[crc >> 3] |= 1 << (crc & 0x7); } } dm_write_async(dev, DM_MCAST_ADDR, DM_MCAST_SIZE, hashes); dm_write_reg_async(dev, DM_RX_CTRL, rx_ctl); } static void __dm9601_set_mac_address(struct usbnet *dev) { dm_write_async(dev, DM_PHY_ADDR, ETH_ALEN, dev->net->dev_addr); } static int dm9601_set_mac_address(struct net_device *net, void *p) { struct sockaddr *addr = p; struct usbnet *dev = netdev_priv(net); if (!is_valid_ether_addr(addr->sa_data)) { dev_err(&net->dev, "not setting invalid mac address %pM\n", addr->sa_data); return -EINVAL; } eth_hw_addr_set(net, addr->sa_data); __dm9601_set_mac_address(dev); return 0; } static const struct net_device_ops dm9601_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_eth_ioctl = dm9601_ioctl, .ndo_set_rx_mode = dm9601_set_multicast, .ndo_set_mac_address = dm9601_set_mac_address, }; static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) { int ret; u8 mac[ETH_ALEN], id; ret = usbnet_get_endpoints(dev, intf); if (ret) goto out; dev->net->netdev_ops = &dm9601_netdev_ops; dev->net->ethtool_ops = &dm9601_ethtool_ops; dev->net->hard_header_len += DM_TX_OVERHEAD; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; /* dm9620/21a require room for 4 byte padding, even in dm9601 * mode, so we need +1 to be able to receive full size * ethernet frames. */ dev->rx_urb_size = dev->net->mtu + ETH_HLEN + DM_RX_OVERHEAD + 1; dev->mii.dev = dev->net; dev->mii.mdio_read = dm9601_mdio_read; dev->mii.mdio_write = dm9601_mdio_write; dev->mii.phy_id_mask = 0x1f; dev->mii.reg_num_mask = 0x1f; /* reset */ dm_write_reg(dev, DM_NET_CTRL, 1); udelay(20); /* read MAC */ if (dm_read(dev, DM_PHY_ADDR, ETH_ALEN, mac) < 0) { printk(KERN_ERR "Error reading MAC address\n"); ret = -ENODEV; goto out; } /* * Overwrite the auto-generated address only with good ones. */ if (is_valid_ether_addr(mac)) eth_hw_addr_set(dev->net, mac); else { printk(KERN_WARNING "dm9601: No valid MAC address in EEPROM, using %pM\n", dev->net->dev_addr); __dm9601_set_mac_address(dev); } if (dm_read_reg(dev, DM_CHIP_ID, &id) < 0) { netdev_err(dev->net, "Error reading chip ID\n"); ret = -ENODEV; goto out; } /* put dm9620 devices in dm9601 mode */ if (id == ID_DM9620) { u8 mode; if (dm_read_reg(dev, DM_MODE_CTRL, &mode) < 0) { netdev_err(dev->net, "Error reading MODE_CTRL\n"); ret = -ENODEV; goto out; } dm_write_reg(dev, DM_MODE_CTRL, mode & 0x7f); } /* power up phy */ dm_write_reg(dev, DM_GPR_CTRL, 1); dm_write_reg(dev, DM_GPR_DATA, 0); /* receive broadcast packets */ dm9601_set_multicast(dev->net); dm9601_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); dm9601_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); mii_nway_restart(&dev->mii); out: return ret; } static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { u8 status; int len; /* format: b1: rx status b2: packet length (incl crc) low b3: packet length (incl crc) high b4..n-4: packet data bn-3..bn: ethernet crc */ if (unlikely(skb->len < DM_RX_OVERHEAD)) { dev_err(&dev->udev->dev, "unexpected tiny rx frame\n"); return 0; } status = skb->data[0]; len = (skb->data[1] | (skb->data[2] << 8)) - 4; if (unlikely(status & 0xbf)) { if (status & 0x01) dev->net->stats.rx_fifo_errors++; if (status & 0x02) dev->net->stats.rx_crc_errors++; if (status & 0x04) dev->net->stats.rx_frame_errors++; if (status & 0x20) dev->net->stats.rx_missed_errors++; if (status & 0x90) dev->net->stats.rx_length_errors++; return 0; } skb_pull(skb, 3); skb_trim(skb, len); return 1; } static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int len, pad; /* format: b1: packet length low b2: packet length high b3..n: packet data */ len = skb->len + DM_TX_OVERHEAD; /* workaround for dm962x errata with tx fifo getting out of * sync if a USB bulk transfer retry happens right after a * packet with odd / maxpacket length by adding up to 3 bytes * padding. */ while ((len & 1) || !(len % dev->maxpacket)) len++; len -= DM_TX_OVERHEAD; /* hw header doesn't count as part of length */ pad = len - skb->len; if (skb_headroom(skb) < DM_TX_OVERHEAD || skb_tailroom(skb) < pad) { struct sk_buff *skb2; skb2 = skb_copy_expand(skb, DM_TX_OVERHEAD, pad, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) return NULL; } __skb_push(skb, DM_TX_OVERHEAD); if (pad) { memset(skb->data + skb->len, 0, pad); __skb_put(skb, pad); } skb->data[0] = len; skb->data[1] = len >> 8; return skb; } static void dm9601_status(struct usbnet *dev, struct urb *urb) { int link; u8 *buf; /* format: b0: net status b1: tx status 1 b2: tx status 2 b3: rx status b4: rx overflow b5: rx count b6: tx count b7: gpr */ if (urb->actual_length < 8) return; buf = urb->transfer_buffer; link = !!(buf[0] & 0x40); if (netif_carrier_ok(dev->net) != link) { usbnet_link_change(dev, link, 1); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } static int dm9601_link_reset(struct usbnet *dev) { struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; mii_check_media(&dev->mii, 1, 1); mii_ethtool_gset(&dev->mii, &ecmd); netdev_dbg(dev->net, "link_reset() speed: %u duplex: %d\n", ethtool_cmd_speed(&ecmd), ecmd.duplex); return 0; } static const struct driver_info dm9601_info = { .description = "Davicom DM96xx USB 10/100 Ethernet", .flags = FLAG_ETHER | FLAG_LINK_INTR, .bind = dm9601_bind, .rx_fixup = dm9601_rx_fixup, .tx_fixup = dm9601_tx_fixup, .status = dm9601_status, .link_reset = dm9601_link_reset, .reset = dm9601_link_reset, }; static const struct usb_device_id products[] = { { USB_DEVICE(0x07aa, 0x9601), /* Corega FEther USB-TXC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9601), /* Davicom USB-100 */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x6688), /* ZT6688 USB NIC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x0268), /* ShanTou ST268 USB NIC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x8515), /* ADMtek ADM8515 USB NIC */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a47, 0x9601), /* Hirose USB-100 */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0fe6, 0x8101), /* DM9601 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0fe6, 0x9700), /* DM9601 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9620), /* DM9620 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9621), /* DM9621A USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x9622), /* DM9622 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x0269), /* DM962OA USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0a46, 0x1269), /* DM9621A USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, { USB_DEVICE(0x0586, 0x3427), /* ZyXEL Keenetic Plus DSL xDSL modem */ .driver_info = (unsigned long)&dm9601_info, }, {}, // END }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver dm9601_driver = { .name = "dm9601", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(dm9601_driver); MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>"); MODULE_DESCRIPTION("Davicom DM96xx USB 10/100 ethernet devices"); MODULE_LICENSE("GPL");
40 40 47 49 49 49 49 49 49 45 43 45 1 47 46 1 48 48 48 45 1 1 46 48 48 45 48 48 4 48 48 49 49 49 1 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 // SPDX-License-Identifier: GPL-2.0 /* * fs/mpage.c * * Copyright (C) 2002, Linus Torvalds. * * Contains functions related to preparing and submitting BIOs which contain * multiple pagecache pages. * * 15May2002 Andrew Morton * Initial version * 27Jun2002 axboe@suse.de * use bio_add_page() to build bio's just the right size */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/kdev_t.h> #include <linux/gfp.h> #include <linux/bio.h> #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/highmem.h> #include <linux/prefetch.h> #include <linux/mpage.h> #include <linux/mm_inline.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> #include "internal.h" /* * I/O completion handler for multipage BIOs. * * The mpage code never puts partial pages into a BIO (except for end-of-file). * If a page does not map to a contiguous run of blocks then it simply falls * back to block_read_full_folio(). * * Why is this? If a page's completion depends on a number of different BIOs * which can complete in any order (or at the same time) then determining the * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ static void mpage_read_end_io(struct bio *bio) { struct folio_iter fi; int err = blk_status_to_errno(bio->bi_status); bio_for_each_folio_all(fi, bio) folio_end_read(fi.folio, err == 0); bio_put(bio); } static void mpage_write_end_io(struct bio *bio) { struct folio_iter fi; int err = blk_status_to_errno(bio->bi_status); bio_for_each_folio_all(fi, bio) { if (err) mapping_set_error(fi.folio->mapping, err); folio_end_writeback(fi.folio); } bio_put(bio); } static struct bio *mpage_bio_submit_read(struct bio *bio) { bio->bi_end_io = mpage_read_end_io; guard_bio_eod(bio); submit_bio(bio); return NULL; } static struct bio *mpage_bio_submit_write(struct bio *bio) { bio->bi_end_io = mpage_write_end_io; guard_bio_eod(bio); submit_bio(bio); return NULL; } /* * support function for mpage_readahead. The fs supplied get_block might * return an up to date buffer. This is used to map that buffer into * the page, which allows read_folio to avoid triggering a duplicate call * to get_block. * * The idea is to avoid adding buffers to pages that don't already have * them. So when the buffer is up to date and the page size == block size, * this marks the page up to date instead of adding new buffers. */ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh, int page_block) { struct inode *inode = folio->mapping->host; struct buffer_head *page_bh, *head; int block = 0; head = folio_buffers(folio); if (!head) { /* * don't make any buffers if there is only one buffer on * the folio and the folio just needs to be set up to date */ if (inode->i_blkbits == folio_shift(folio) && buffer_uptodate(bh)) { folio_mark_uptodate(folio); return; } head = create_empty_buffers(folio, i_blocksize(inode), 0); } page_bh = head; do { if (block == page_block) { page_bh->b_state = bh->b_state; page_bh->b_bdev = bh->b_bdev; page_bh->b_blocknr = bh->b_blocknr; break; } page_bh = page_bh->b_this_page; block++; } while (page_bh != head); } struct mpage_readpage_args { struct bio *bio; struct folio *folio; unsigned int nr_pages; bool is_readahead; sector_t last_block_in_bio; struct buffer_head map_bh; unsigned long first_logical_block; get_block_t *get_block; }; /* * This is the worker routine which does all the work of mapping the disk * blocks and constructs largest possible bios, submits them for IO if the * blocks are not contiguous on the disk. * * We pass a buffer_head back and forth and use its buffer_mapped() flag to * represent the validity of its disk mapping and to decide when to do the next * get_block() call. */ static void do_mpage_readpage(struct mpage_readpage_args *args) { struct folio *folio = args->folio; struct inode *inode = folio->mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_folio = folio_size(folio) >> blkbits; const unsigned blocksize = 1 << blkbits; struct buffer_head *map_bh = &args->map_bh; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t first_block; unsigned page_block; unsigned first_hole = blocks_per_folio; struct block_device *bdev = NULL; int length; int fully_mapped = 1; blk_opf_t opf = REQ_OP_READ; unsigned nblocks; unsigned relative_block; gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); if (args->is_readahead) { opf |= REQ_RAHEAD; gfp |= __GFP_NORETRY | __GFP_NOWARN; } if (folio_buffers(folio)) goto confused; block_in_file = folio_pos(folio) >> blkbits; last_block = block_in_file + ((args->nr_pages * PAGE_SIZE) >> blkbits); last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; /* * Map blocks using the result from the previous get_blocks call first. */ nblocks = map_bh->b_size >> blkbits; if (buffer_mapped(map_bh) && block_in_file > args->first_logical_block && block_in_file < (args->first_logical_block + nblocks)) { unsigned map_offset = block_in_file - args->first_logical_block; unsigned last = nblocks - map_offset; first_block = map_bh->b_blocknr + map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { clear_buffer_mapped(map_bh); break; } if (page_block == blocks_per_folio) break; page_block++; block_in_file++; } bdev = map_bh->b_bdev; } /* * Then do more get_blocks calls until we are done with this folio. */ map_bh->b_folio = folio; while (page_block < blocks_per_folio) { map_bh->b_state = 0; map_bh->b_size = 0; if (block_in_file < last_block) { map_bh->b_size = (last_block-block_in_file) << blkbits; if (args->get_block(inode, block_in_file, map_bh, 0)) goto confused; args->first_logical_block = block_in_file; } if (!buffer_mapped(map_bh)) { fully_mapped = 0; if (first_hole == blocks_per_folio) first_hole = page_block; page_block++; block_in_file++; continue; } /* some filesystems will copy data into the page during * the get_block call, in which case we don't want to * read it again. map_buffer_to_folio copies the data * we just collected from get_block into the folio's buffers * so read_folio doesn't have to repeat the get_block call */ if (buffer_uptodate(map_bh)) { map_buffer_to_folio(folio, map_bh, page_block); goto confused; } if (first_hole != blocks_per_folio) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ if (!page_block) first_block = map_bh->b_blocknr; else if (first_block + page_block != map_bh->b_blocknr) goto confused; nblocks = map_bh->b_size >> blkbits; for (relative_block = 0; ; relative_block++) { if (relative_block == nblocks) { clear_buffer_mapped(map_bh); break; } else if (page_block == blocks_per_folio) break; page_block++; block_in_file++; } bdev = map_bh->b_bdev; } if (first_hole != blocks_per_folio) { folio_zero_segment(folio, first_hole << blkbits, folio_size(folio)); if (first_hole == 0) { folio_mark_uptodate(folio); folio_unlock(folio); goto out; } } else if (fully_mapped) { folio_set_mappedtodisk(folio); } /* * This folio will go to BIO. Do we need to send this BIO off first? */ if (args->bio && (args->last_block_in_bio != first_block - 1)) args->bio = mpage_bio_submit_read(args->bio); alloc_new: if (args->bio == NULL) { args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf, gfp); if (args->bio == NULL) goto confused; args->bio->bi_iter.bi_sector = first_block << (blkbits - 9); } length = first_hole << blkbits; if (!bio_add_folio(args->bio, folio, length, 0)) { args->bio = mpage_bio_submit_read(args->bio); goto alloc_new; } relative_block = block_in_file - args->first_logical_block; nblocks = map_bh->b_size >> blkbits; if ((buffer_boundary(map_bh) && relative_block == nblocks) || (first_hole != blocks_per_folio)) args->bio = mpage_bio_submit_read(args->bio); else args->last_block_in_bio = first_block + blocks_per_folio - 1; out: return; confused: if (args->bio) args->bio = mpage_bio_submit_read(args->bio); if (!folio_test_uptodate(folio)) block_read_full_folio(folio, args->get_block); else folio_unlock(folio); goto out; } /** * mpage_readahead - start reads against pages * @rac: Describes which pages to read. * @get_block: The filesystem's block mapper function. * * This function walks the pages and the blocks within each page, building and * emitting large BIOs. * * If anything unusual happens, such as: * * - encountering a page which has buffers * - encountering a page which has a non-hole after a hole * - encountering a page with non-contiguous blocks * * then this code just gives up and calls the buffer_head-based read function. * It does handle a page which has holes at the end - that is a common case: * the end-of-file on blocksize < PAGE_SIZE setups. * * BH_Boundary explanation: * * There is a problem. The mpage read code assembles several pages, gets all * their disk mappings, and then submits them all. That's fine, but obtaining * the disk mappings may require I/O. Reads of indirect blocks, for example. * * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be * submitted in the following order: * * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 * * because the indirect block has to be read to get the mappings of blocks * 13,14,15,16. Obviously, this impacts performance. * * So what we do it to allow the filesystem's get_block() function to set * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block * after this one will require I/O against a block which is probably close to * this one. So you should push what I/O you have currently accumulated. * * This all causes the disk requests to be issued in the correct order. */ void mpage_readahead(struct readahead_control *rac, get_block_t get_block) { struct folio *folio; struct mpage_readpage_args args = { .get_block = get_block, .is_readahead = true, }; while ((folio = readahead_folio(rac))) { prefetchw(&folio->flags); args.folio = folio; args.nr_pages = readahead_count(rac); do_mpage_readpage(&args); /* * If read ahead failed synchronously, it may cause by removed * device, or some filesystem metadata error. */ if (!folio_test_locked(folio) && !folio_test_uptodate(folio)) break; } if (args.bio) mpage_bio_submit_read(args.bio); } EXPORT_SYMBOL(mpage_readahead); /* * This isn't called much at all */ int mpage_read_folio(struct folio *folio, get_block_t get_block) { struct mpage_readpage_args args = { .folio = folio, .nr_pages = folio_nr_pages(folio), .get_block = get_block, }; do_mpage_readpage(&args); if (args.bio) mpage_bio_submit_read(args.bio); return 0; } EXPORT_SYMBOL(mpage_read_folio); /* * Writing is not so simple. * * If the page has buffers then they will be used for obtaining the disk * mapping. We only support pages which are fully mapped-and-dirty, with a * special case for pages which are unmapped at the end: end-of-file. * * If the page has no buffers (preferred) then the page is mapped here. * * If all blocks are found to be contiguous then the page can go into the * BIO. Otherwise fall back to the mapping's writepage(). * * FIXME: This code wants an estimate of how many pages are still to be * written, so it can intelligently allocate a suitably-sized BIO. For now, * just allocate full-size (16-page) BIOs. */ struct mpage_data { struct bio *bio; sector_t last_block_in_bio; get_block_t *get_block; }; /* * We have our BIO, so we can now mark the buffers clean. Make * sure to only clean buffers which we know we'll be writing. */ static void clean_buffers(struct folio *folio, unsigned first_unmapped) { unsigned buffer_counter = 0; struct buffer_head *bh, *head = folio_buffers(folio); if (!head) return; bh = head; do { if (buffer_counter++ == first_unmapped) break; clear_buffer_dirty(bh); bh = bh->b_this_page; } while (bh != head); /* * we cannot drop the bh if the page is not uptodate or a concurrent * read_folio would fail to serialize with the bh and it would read from * disk before we reach the platter. */ if (buffer_heads_over_limit && folio_test_uptodate(folio)) try_to_free_buffers(folio); } static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio, struct mpage_data *mpd) { struct bio *bio = mpd->bio; struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_folio = folio_size(folio) >> blkbits; sector_t last_block; sector_t block_in_file; sector_t first_block; unsigned page_block; unsigned first_unmapped = blocks_per_folio; struct block_device *bdev = NULL; int boundary = 0; sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; size_t length; struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; struct buffer_head *head = folio_buffers(folio); if (head) { struct buffer_head *bh = head; /* If they're all mapped and dirty, do it */ page_block = 0; do { BUG_ON(buffer_locked(bh)); if (!buffer_mapped(bh)) { /* * unmapped dirty buffers are created by * block_dirty_folio -> mmapped data */ if (buffer_dirty(bh)) goto confused; if (first_unmapped == blocks_per_folio) first_unmapped = page_block; continue; } if (first_unmapped != blocks_per_folio) goto confused; /* hole -> non-hole */ if (!buffer_dirty(bh) || !buffer_uptodate(bh)) goto confused; if (page_block) { if (bh->b_blocknr != first_block + page_block) goto confused; } else { first_block = bh->b_blocknr; } page_block++; boundary = buffer_boundary(bh); if (boundary) { boundary_block = bh->b_blocknr; boundary_bdev = bh->b_bdev; } bdev = bh->b_bdev; } while ((bh = bh->b_this_page) != head); if (first_unmapped) goto page_is_mapped; /* * Page has buffers, but they are all unmapped. The page was * created by pagein or read over a hole which was handled by * block_read_full_folio(). If this address_space is also * using mpage_readahead then this can rarely happen. */ goto confused; } /* * The page has no buffers: map it to disk */ BUG_ON(!folio_test_uptodate(folio)); block_in_file = folio_pos(folio) >> blkbits; /* * Whole page beyond EOF? Skip allocating blocks to avoid leaking * space. */ if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits) goto page_is_mapped; last_block = (i_size - 1) >> blkbits; map_bh.b_folio = folio; for (page_block = 0; page_block < blocks_per_folio; ) { map_bh.b_state = 0; map_bh.b_size = 1 << blkbits; if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (!buffer_mapped(&map_bh)) goto confused; if (buffer_new(&map_bh)) clean_bdev_bh_alias(&map_bh); if (buffer_boundary(&map_bh)) { boundary_block = map_bh.b_blocknr; boundary_bdev = map_bh.b_bdev; } if (page_block) { if (map_bh.b_blocknr != first_block + page_block) goto confused; } else { first_block = map_bh.b_blocknr; } page_block++; boundary = buffer_boundary(&map_bh); bdev = map_bh.b_bdev; if (block_in_file == last_block) break; block_in_file++; } BUG_ON(page_block == 0); first_unmapped = page_block; page_is_mapped: /* Don't bother writing beyond EOF, truncate will discard the folio */ if (folio_pos(folio) >= i_size) goto confused; length = folio_size(folio); if (folio_pos(folio) + length > i_size) { /* * The page straddles i_size. It must be zeroed out on each * and every writepage invocation because it may be mmapped. * "A file is mapped in multiples of the page size. For a file * that is not a multiple of the page size, the remaining memory * is zeroed when mapped, and writes to that region are not * written out to the file." */ length = i_size - folio_pos(folio); folio_zero_segment(folio, length, folio_size(folio)); } /* * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != first_block - 1) bio = mpage_bio_submit_write(bio); alloc_new: if (bio == NULL) { bio = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE | wbc_to_write_flags(wbc), GFP_NOFS); bio->bi_iter.bi_sector = first_block << (blkbits - 9); wbc_init_bio(wbc, bio); bio->bi_write_hint = inode->i_write_hint; } /* * Must try to add the page before marking the buffer clean or * the confused fail path above (OOM) will be very confused when * it finds all bh marked clean (i.e. it will not write anything) */ wbc_account_cgroup_owner(wbc, folio, folio_size(folio)); length = first_unmapped << blkbits; if (!bio_add_folio(bio, folio, length, 0)) { bio = mpage_bio_submit_write(bio); goto alloc_new; } clean_buffers(folio, first_unmapped); BUG_ON(folio_test_writeback(folio)); folio_start_writeback(folio); folio_unlock(folio); if (boundary || (first_unmapped != blocks_per_folio)) { bio = mpage_bio_submit_write(bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); } } else { mpd->last_block_in_bio = first_block + blocks_per_folio - 1; } goto out; confused: if (bio) bio = mpage_bio_submit_write(bio); /* * The caller has a ref on the inode, so *mapping is stable */ ret = block_write_full_folio(folio, wbc, mpd->get_block); mapping_set_error(mapping, ret); out: mpd->bio = bio; return ret; } /** * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. * * This is a library function, which implements the writepages() * address_space_operation. */ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { struct mpage_data mpd = { .get_block = get_block, }; struct folio *folio = NULL; struct blk_plug plug; int error; blk_start_plug(&plug); while ((folio = writeback_iter(mapping, wbc, folio, &error))) error = mpage_write_folio(wbc, folio, &mpd); if (mpd.bio) mpage_bio_submit_write(mpd.bio); blk_finish_plug(&plug); return error; } EXPORT_SYMBOL(mpage_writepages);
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/affs/bitmap.c * * (c) 1996 Hans-Joachim Widmaier * * bitmap.c contains the code that handles all bitmap related stuff - * block allocation, deallocation, calculation of free space. */ #include <linux/slab.h> #include "affs.h" u32 affs_count_free_blocks(struct super_block *sb) { struct affs_bm_info *bm; u32 free; int i; pr_debug("%s()\n", __func__); if (sb_rdonly(sb)) return 0; mutex_lock(&AFFS_SB(sb)->s_bmlock); bm = AFFS_SB(sb)->s_bitmap; free = 0; for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--) free += bm->bm_free; mutex_unlock(&AFFS_SB(sb)->s_bmlock); return free; } void affs_free_block(struct super_block *sb, u32 block) { struct affs_sb_info *sbi = AFFS_SB(sb); struct affs_bm_info *bm; struct buffer_head *bh; u32 blk, bmap, bit, mask, tmp; __be32 *data; pr_debug("%s(%u)\n", __func__, block); if (block > sbi->s_partition_size) goto err_range; blk = block - sbi->s_reserved; bmap = blk / sbi->s_bmap_bits; bit = blk % sbi->s_bmap_bits; bm = &sbi->s_bitmap[bmap]; mutex_lock(&sbi->s_bmlock); bh = sbi->s_bmap_bh; if (sbi->s_last_bmap != bmap) { affs_brelse(bh); bh = affs_bread(sb, bm->bm_key); if (!bh) goto err_bh_read; sbi->s_bmap_bh = bh; sbi->s_last_bmap = bmap; } mask = 1 << (bit & 31); data = (__be32 *)bh->b_data + bit / 32 + 1; /* mark block free */ tmp = be32_to_cpu(*data); if (tmp & mask) goto err_free; *data = cpu_to_be32(tmp | mask); /* fix checksum */ tmp = be32_to_cpu(*(__be32 *)bh->b_data); *(__be32 *)bh->b_data = cpu_to_be32(tmp - mask); mark_buffer_dirty(bh); affs_mark_sb_dirty(sb); bm->bm_free++; mutex_unlock(&sbi->s_bmlock); return; err_free: affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block); mutex_unlock(&sbi->s_bmlock); return; err_bh_read: affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key); sbi->s_bmap_bh = NULL; sbi->s_last_bmap = ~0; mutex_unlock(&sbi->s_bmlock); return; err_range: affs_error(sb, "affs_free_block","Block %u outside partition", block); } /* * Allocate a block in the given allocation zone. * Since we have to byte-swap the bitmap on little-endian * machines, this is rather expensive. Therefore we will * preallocate up to 16 blocks from the same word, if * possible. We are not doing preallocations in the * header zone, though. */ u32 affs_alloc_block(struct inode *inode, u32 goal) { struct super_block *sb; struct affs_sb_info *sbi; struct affs_bm_info *bm; struct buffer_head *bh; __be32 *data, *enddata; u32 blk, bmap, bit, mask, mask2, tmp; int i; sb = inode->i_sb; sbi = AFFS_SB(sb); pr_debug("balloc(inode=%lu,goal=%u): ", inode->i_ino, goal); if (AFFS_I(inode)->i_pa_cnt) { pr_debug("%d\n", AFFS_I(inode)->i_lastalloc+1); AFFS_I(inode)->i_pa_cnt--; return ++AFFS_I(inode)->i_lastalloc; } if (!goal || goal > sbi->s_partition_size) { if (goal) affs_warning(sb, "affs_balloc", "invalid goal %d", goal); //if (!AFFS_I(inode)->i_last_block) // affs_warning(sb, "affs_balloc", "no last alloc block"); goal = sbi->s_reserved; } blk = goal - sbi->s_reserved; bmap = blk / sbi->s_bmap_bits; bm = &sbi->s_bitmap[bmap]; mutex_lock(&sbi->s_bmlock); if (bm->bm_free) goto find_bmap_bit; find_bmap: /* search for the next bmap buffer with free bits */ i = sbi->s_bmap_count; do { if (--i < 0) goto err_full; bmap++; bm++; if (bmap < sbi->s_bmap_count) continue; /* restart search at zero */ bmap = 0; bm = sbi->s_bitmap; } while (!bm->bm_free); blk = bmap * sbi->s_bmap_bits; find_bmap_bit: bh = sbi->s_bmap_bh; if (sbi->s_last_bmap != bmap) { affs_brelse(bh); bh = affs_bread(sb, bm->bm_key); if (!bh) goto err_bh_read; sbi->s_bmap_bh = bh; sbi->s_last_bmap = bmap; } /* find an unused block in this bitmap block */ bit = blk % sbi->s_bmap_bits; data = (__be32 *)bh->b_data + bit / 32 + 1; enddata = (__be32 *)((u8 *)bh->b_data + sb->s_blocksize); mask = ~0UL << (bit & 31); blk &= ~31UL; tmp = be32_to_cpu(*data); if (tmp & mask) goto find_bit; /* scan the rest of the buffer */ do { blk += 32; if (++data >= enddata) /* didn't find something, can only happen * if scan didn't start at 0, try next bmap */ goto find_bmap; } while (!*data); tmp = be32_to_cpu(*data); mask = ~0; find_bit: /* finally look for a free bit in the word */ bit = ffs(tmp & mask) - 1; blk += bit + sbi->s_reserved; mask2 = mask = 1 << (bit & 31); AFFS_I(inode)->i_lastalloc = blk; /* prealloc as much as possible within this word */ while ((mask2 <<= 1)) { if (!(tmp & mask2)) break; AFFS_I(inode)->i_pa_cnt++; mask |= mask2; } bm->bm_free -= AFFS_I(inode)->i_pa_cnt + 1; *data = cpu_to_be32(tmp & ~mask); /* fix checksum */ tmp = be32_to_cpu(*(__be32 *)bh->b_data); *(__be32 *)bh->b_data = cpu_to_be32(tmp + mask); mark_buffer_dirty(bh); affs_mark_sb_dirty(sb); mutex_unlock(&sbi->s_bmlock); pr_debug("%d\n", blk); return blk; err_bh_read: affs_error(sb,"affs_read_block","Cannot read bitmap block %u", bm->bm_key); sbi->s_bmap_bh = NULL; sbi->s_last_bmap = ~0; err_full: mutex_unlock(&sbi->s_bmlock); pr_debug("failed\n"); return 0; } int affs_init_bitmap(struct super_block *sb, int *flags) { struct affs_bm_info *bm; struct buffer_head *bmap_bh = NULL, *bh = NULL; __be32 *bmap_blk; u32 size, blk, end, offset, mask; int i, res = 0; struct affs_sb_info *sbi = AFFS_SB(sb); if (*flags & SB_RDONLY) return 0; if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) { pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id); *flags |= SB_RDONLY; return 0; } sbi->s_last_bmap = ~0; sbi->s_bmap_bh = NULL; sbi->s_bmap_bits = sb->s_blocksize * 8 - 32; sbi->s_bmap_count = (sbi->s_partition_size - sbi->s_reserved + sbi->s_bmap_bits - 1) / sbi->s_bmap_bits; size = sbi->s_bmap_count * sizeof(*bm); bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL); if (!sbi->s_bitmap) { pr_err("Bitmap allocation failed\n"); return -ENOMEM; } bmap_blk = (__be32 *)sbi->s_root_bh->b_data; blk = sb->s_blocksize / 4 - 49; end = blk + 25; for (i = sbi->s_bmap_count; i > 0; bm++, i--) { affs_brelse(bh); bm->bm_key = be32_to_cpu(bmap_blk[blk]); bh = affs_bread(sb, bm->bm_key); if (!bh) { pr_err("Cannot read bitmap\n"); res = -EIO; goto out; } if (affs_checksum_block(sb, bh)) { pr_warn("Bitmap %u invalid - mounting %s read only.\n", bm->bm_key, sb->s_id); *flags |= SB_RDONLY; goto out; } pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key); bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); /* Don't try read the extension if this is the last block, * but we also need the right bm pointer below */ if (++blk < end || i == 1) continue; if (bmap_bh) affs_brelse(bmap_bh); bmap_bh = affs_bread(sb, be32_to_cpu(bmap_blk[blk])); if (!bmap_bh) { pr_err("Cannot read bitmap extension\n"); res = -EIO; goto out; } bmap_blk = (__be32 *)bmap_bh->b_data; blk = 0; end = sb->s_blocksize / 4 - 1; } offset = (sbi->s_partition_size - sbi->s_reserved) % sbi->s_bmap_bits; mask = ~(0xFFFFFFFFU << (offset & 31)); pr_debug("last word: %d %d %d\n", offset, offset / 32 + 1, mask); offset = offset / 32 + 1; if (mask) { u32 old, new; /* Mark unused bits in the last word as allocated */ old = be32_to_cpu(((__be32 *)bh->b_data)[offset]); new = old & mask; //if (old != new) { ((__be32 *)bh->b_data)[offset] = cpu_to_be32(new); /* fix checksum */ //new -= old; //old = be32_to_cpu(*(__be32 *)bh->b_data); //*(__be32 *)bh->b_data = cpu_to_be32(old - new); //mark_buffer_dirty(bh); //} /* correct offset for the bitmap count below */ //offset++; } while (++offset < sb->s_blocksize / 4) ((__be32 *)bh->b_data)[offset] = 0; ((__be32 *)bh->b_data)[0] = 0; ((__be32 *)bh->b_data)[0] = cpu_to_be32(-affs_checksum_block(sb, bh)); mark_buffer_dirty(bh); /* recalculate bitmap count for last block */ bm--; bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); out: affs_brelse(bh); affs_brelse(bmap_bh); return res; } void affs_free_bitmap(struct super_block *sb) { struct affs_sb_info *sbi = AFFS_SB(sb); if (!sbi->s_bitmap) return; affs_brelse(sbi->s_bmap_bh); sbi->s_bmap_bh = NULL; sbi->s_last_bmap = ~0; kfree(sbi->s_bitmap); sbi->s_bitmap = NULL; }
3 4 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/ch11.h> #define TEST_SE0_NAK_PID 0x0101 #define TEST_J_PID 0x0102 #define TEST_K_PID 0x0103 #define TEST_PACKET_PID 0x0104 #define TEST_HS_HOST_PORT_SUSPEND_RESUME 0x0106 #define TEST_SINGLE_STEP_GET_DEV_DESC 0x0107 #define TEST_SINGLE_STEP_SET_FEATURE 0x0108 extern const struct usb_device_id *usb_device_match_id(struct usb_device *udev, const struct usb_device_id *id); /* * A list of USB hubs which requires to disable the power * to the port before starting the testing procedures. */ static const struct usb_device_id ehset_hub_list[] = { { USB_DEVICE(0x0424, 0x4502) }, { USB_DEVICE(0x0424, 0x4913) }, { USB_DEVICE(0x0451, 0x8027) }, { } }; static int ehset_prepare_port_for_testing(struct usb_device *hub_udev, u16 portnum) { int ret = 0; /* * The USB2.0 spec chapter 11.24.2.13 says that the USB port which is * going under test needs to be put in suspend before sending the * test command. Most hubs don't enforce this precondition, but there * are some hubs which needs to disable the power to the port before * starting the test. */ if (usb_device_match_id(hub_udev, ehset_hub_list)) { ret = usb_control_msg_send(hub_udev, 0, USB_REQ_CLEAR_FEATURE, USB_RT_PORT, USB_PORT_FEAT_ENABLE, portnum, NULL, 0, 1000, GFP_KERNEL); /* * Wait for the port to be disabled. It's an arbitrary value * which worked every time. */ msleep(100); } else { /* * For the hubs which are compliant with the spec, * put the port in SUSPEND. */ ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_SUSPEND, portnum, NULL, 0, 1000, GFP_KERNEL); } return ret; } static int ehset_probe(struct usb_interface *intf, const struct usb_device_id *id) { int ret = -EINVAL; struct usb_device *dev = interface_to_usbdev(intf); struct usb_device *hub_udev = dev->parent; struct usb_device_descriptor buf; u8 portnum = dev->portnum; u16 test_pid = le16_to_cpu(dev->descriptor.idProduct); switch (test_pid) { case TEST_SE0_NAK_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_SE0_NAK << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_J_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_J << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_K_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_K << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_PACKET_PID: ret = ehset_prepare_port_for_testing(hub_udev, portnum); if (ret < 0) break; ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (USB_TEST_PACKET << 8) | portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_HS_HOST_PORT_SUSPEND_RESUME: /* Test: wait for 15secs -> suspend -> 15secs delay -> resume */ msleep(15 * 1000); ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_SUSPEND, portnum, NULL, 0, 1000, GFP_KERNEL); if (ret < 0) break; msleep(15 * 1000); ret = usb_control_msg_send(hub_udev, 0, USB_REQ_CLEAR_FEATURE, USB_RT_PORT, USB_PORT_FEAT_SUSPEND, portnum, NULL, 0, 1000, GFP_KERNEL); break; case TEST_SINGLE_STEP_GET_DEV_DESC: /* Test: wait for 15secs -> GetDescriptor request */ msleep(15 * 1000); ret = usb_control_msg_recv(dev, 0, USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, USB_DT_DEVICE << 8, 0, &buf, USB_DT_DEVICE_SIZE, USB_CTRL_GET_TIMEOUT, GFP_KERNEL); break; case TEST_SINGLE_STEP_SET_FEATURE: /* * GetDescriptor SETUP request -> 15secs delay -> IN & STATUS * * Note, this test is only supported on root hubs since the * SetPortFeature handling can only be done inside the HCD's * hub_control callback function. */ if (hub_udev != dev->bus->root_hub) { dev_err(&intf->dev, "SINGLE_STEP_SET_FEATURE test only supported on root hub\n"); break; } ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE, USB_RT_PORT, USB_PORT_FEAT_TEST, (6 << 8) | portnum, NULL, 0, 60 * 1000, GFP_KERNEL); break; default: dev_err(&intf->dev, "%s: unsupported PID: 0x%x\n", __func__, test_pid); } return ret; } static void ehset_disconnect(struct usb_interface *intf) { } static const struct usb_device_id ehset_id_table[] = { { USB_DEVICE(0x1a0a, TEST_SE0_NAK_PID) }, { USB_DEVICE(0x1a0a, TEST_J_PID) }, { USB_DEVICE(0x1a0a, TEST_K_PID) }, { USB_DEVICE(0x1a0a, TEST_PACKET_PID) }, { USB_DEVICE(0x1a0a, TEST_HS_HOST_PORT_SUSPEND_RESUME) }, { USB_DEVICE(0x1a0a, TEST_SINGLE_STEP_GET_DEV_DESC) }, { USB_DEVICE(0x1a0a, TEST_SINGLE_STEP_SET_FEATURE) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ehset_id_table); static struct usb_driver ehset_driver = { .name = "usb_ehset_test", .probe = ehset_probe, .disconnect = ehset_disconnect, .id_table = ehset_id_table, }; module_usb_driver(ehset_driver); MODULE_DESCRIPTION("USB Driver for EHSET Test Fixture"); MODULE_LICENSE("GPL v2");
1 1 2 2 5 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/init.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr)) void nft_objref_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_object *obj = nft_objref_priv(expr); obj->ops->eval(obj, regs, pkt); } static int nft_objref_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_object *obj = nft_objref_priv(expr); u8 genmask = nft_genmask_next(ctx->net); u32 objtype; if (!tb[NFTA_OBJREF_IMM_NAME] || !tb[NFTA_OBJREF_IMM_TYPE]) return -EINVAL; objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); obj = nft_obj_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, genmask); if (IS_ERR(obj)) return -ENOENT; if (!nft_use_inc(&obj->use)) return -EMFILE; nft_objref_priv(expr) = obj; return 0; } static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_object *obj = nft_objref_priv(expr); if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->ops->type->type))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_objref_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_object *obj = nft_objref_priv(expr); if (phase == NFT_TRANS_COMMIT) return; nft_use_dec(&obj->use); } static void nft_objref_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_object *obj = nft_objref_priv(expr); nft_use_inc_restore(&obj->use); } static const struct nft_expr_ops nft_objref_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), .eval = nft_objref_eval, .init = nft_objref_init, .activate = nft_objref_activate, .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, .reduce = NFT_REDUCE_READONLY, }; struct nft_objref_map { struct nft_set *set; u8 sreg; struct nft_set_binding binding; }; void nft_objref_map_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_objref_map *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; struct net *net = nft_net(pkt); const struct nft_set_ext *ext; struct nft_object *obj; ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]); if (!ext) { ext = nft_set_catchall_lookup(net, set); if (!ext) { regs->verdict.code = NFT_BREAK; return; } } obj = *nft_set_ext_obj(ext); obj->ops->eval(obj, regs, pkt); } static int nft_objref_map_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_objref_map *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; int err; set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME], tb[NFTA_OBJREF_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; err = nft_parse_register_load(ctx, tb[NFTA_OBJREF_SET_SREG], &priv->sreg, set->klen); if (err < 0) return err; priv->binding.flags = set->flags & NFT_SET_OBJECT; err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) return err; priv->set = set; return 0; } static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_objref_map *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_OBJREF_SET_SREG, priv->sreg) || nla_put_string(skb, NFTA_OBJREF_SET_NAME, priv->set->name)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_objref_map_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr); nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); } static void nft_objref_map_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_objref_map *priv = nft_expr_priv(expr); nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_objref_map *priv = nft_expr_priv(expr); nf_tables_destroy_set(ctx, priv->set); } static const struct nft_expr_ops nft_objref_map_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * nft_objref_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { if (tb[NFTA_OBJREF_SET_SREG] && (tb[NFTA_OBJREF_SET_NAME] || tb[NFTA_OBJREF_SET_ID])) return &nft_objref_map_ops; else if (tb[NFTA_OBJREF_IMM_NAME] && tb[NFTA_OBJREF_IMM_TYPE]) return &nft_objref_ops; return ERR_PTR(-EOPNOTSUPP); } static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 }, [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 }, [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, }; struct nft_expr_type nft_objref_type __read_mostly = { .name = "objref", .select_ops = nft_objref_select_ops, .policy = nft_objref_policy, .maxattr = NFTA_OBJREF_MAX, .owner = THIS_MODULE, };
5 5 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA508 chip based cameras subdriver * * Copyright (C) 2009 Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca508" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA508 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 subtype; #define CreativeVista 0 #define HamaUSBSightcam 1 #define HamaUSBSightcam2 2 #define IntelEasyPCCamera 3 #define MicroInnovationIC200 4 #define ViewQuestVQ110 5 }; static const struct v4l2_pix_format sif_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* Frame packet header offsets for the spca508 */ #define SPCA508_OFFSET_DATA 37 /* * Initialization data: this is the first set-up data written to the * device (before the open data). */ static const u16 spca508_init_data[][2] = { {0x0000, 0x870b}, {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8110}, /* Disable all outputs */ /* READ {0x0000, 0x8114} -> 0000: 00 */ {0x0000, 0x8114}, /* SW GPIO data */ {0x0008, 0x8110}, /* Enable charge pump output */ {0x0002, 0x8116}, /* 200 kHz pump clock */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE:) */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8111}, /* Normal mode (not reset) */ {0x0098, 0x8110}, /* Enable charge pump output, sync.serial,external 2x clock */ {0x000d, 0x8114}, /* SW GPIO data */ {0x0002, 0x8116}, /* 200 kHz pump clock */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* --------------------------------------- */ {0x000f, 0x8402}, /* memory bank */ {0x0000, 0x8403}, /* ... address */ /* --------------------------------------- */ /* 0x88__ is Synchronous Serial Interface. */ /* TBD: This table could be expressed more compactly */ /* using spca508_write_i2c_vector(). */ /* TBD: Should see if the values in spca50x_i2c_data */ /* would work with the VQ110 instead of the values */ /* below. */ {0x00c0, 0x8804}, /* SSI slave addr */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0080, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0000, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0011, 0x8801}, /* SSI reg addr */ {0x0040, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0013, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0014, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0015, 0x8801}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0016, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0017, 0x8801}, {0x0036, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0018, 0x8801}, {0x00ec, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001a, 0x8801}, {0x0094, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001b, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0027, 0x8801}, {0x00a2, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0028, 0x8801}, {0x0040, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002b, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002c, 0x8801}, {0x00fe, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002d, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0038, 0x8801}, {0x0083, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0033, 0x8801}, {0x0081, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0034, 0x8801}, {0x004a, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0039, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0010, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0006, 0x8801}, {0x0058, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0000, 0x8801}, {0x0004, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0040, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0041, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0042, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0043, 0x8801}, {0x0028, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0044, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0045, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0046, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0047, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0048, 0x8801}, {0x004c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0049, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004b, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* --------------------------------------- */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0001, 0x870c}, /* CKOx2 output */ /* --------------------------------------- */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x0001, 0x8606}, /* reserved */ {0x0064, 0x8607}, /* Line memory read counter (H) 0x6480=25,728 */ {0x002a, 0x8601}, /* CDSP sharp interpolation mode, * line sel for color sep, edge enhance enab */ {0x0000, 0x8602}, /* optical black level for user settng = 0 */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x000a, 0x8603}, /* optical black level calc mode: * auto; optical black offset = 10 */ {0x00df, 0x865b}, /* Horiz offset for valid pixels (L)=0xdf */ {0x0012, 0x865c}, /* Vert offset for valid lines (L)=0x12 */ /* The following two lines seem to be the "wrong" resolution. */ /* But perhaps these indicate the actual size of the sensor */ /* rather than the size of the current video mode. */ {0x0058, 0x865d}, /* Horiz valid pixels (*4) (L) = 352 */ {0x0048, 0x865e}, /* Vert valid lines (*4) (L) = 288 */ {0x0015, 0x8608}, /* A11 Coef ... */ {0x0030, 0x8609}, {0x00fb, 0x860a}, {0x003e, 0x860b}, {0x00ce, 0x860c}, {0x00f4, 0x860d}, {0x00eb, 0x860e}, {0x00dc, 0x860f}, {0x0039, 0x8610}, {0x0001, 0x8611}, /* R offset for white balance ... */ {0x0000, 0x8612}, {0x0001, 0x8613}, {0x0000, 0x8614}, {0x005b, 0x8651}, /* R gain for white balance ... */ {0x0040, 0x8652}, {0x0060, 0x8653}, {0x0040, 0x8654}, {0x0000, 0x8655}, {0x0001, 0x863f}, /* Fixed gamma correction enable, USB control, * lum filter disable, lum noise clip disable */ {0x00a1, 0x8656}, /* Window1 size 256x256, Windows2 size 64x64, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8657}, /* Edge gain high thresh */ {0x0020, 0x8658}, /* Edge gain low thresh */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ /* -------------------------------- */ {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xa908, 0x8802}, {0x0034, 0x8801}, /* SSI reg addr */ {0x00ca, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x1f08, 0x8802}, {0x0006, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* ----- Read back coefs we wrote earlier. */ /* READ { 0x0000, 0x8608 } -> 0000: 15 */ /* READ { 0x0000, 0x8609 } -> 0000: 30 */ /* READ { 0x0000, 0x860a } -> 0000: fb */ /* READ { 0x0000, 0x860b } -> 0000: 3e */ /* READ { 0x0000, 0x860c } -> 0000: ce */ /* READ { 0x0000, 0x860d } -> 0000: f4 */ /* READ { 0x0000, 0x860e } -> 0000: eb */ /* READ { 0x0000, 0x860f } -> 0000: dc */ /* READ { 0x0000, 0x8610 } -> 0000: 39 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xb008, 0x8802}, {0x0006, 0x8801}, {0x007d, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* This chunk is seemingly redundant with */ /* earlier commands (A11 Coef...), but if I disable it, */ /* the image appears too dark. Maybe there was some kind of */ /* reset since the earlier commands, so this is necessary again. */ {0x0015, 0x8608}, {0x0030, 0x8609}, {0xfffb, 0x860a}, {0x003e, 0x860b}, {0xffce, 0x860c}, {0xfff4, 0x860d}, {0xffeb, 0x860e}, {0xffdc, 0x860f}, {0x0039, 0x8610}, {0x0018, 0x8657}, {0x0000, 0x8508}, /* Disable compression. */ /* Previous line was: {0x0021, 0x8508}, * Enable compression. */ {0x0032, 0x850b}, /* compression stuff */ {0x0003, 0x8509}, /* compression stuff */ {0x0011, 0x850a}, /* compression stuff */ {0x0021, 0x850d}, /* compression stuff */ {0x0010, 0x850c}, /* compression stuff */ {0x0003, 0x8500}, /* *** Video mode: 160x120 */ {0x0001, 0x8501}, /* Hardware-dominated snap control */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8617}, /* Window1 start X (*2) */ {0x0008, 0x8618}, /* Window1 start Y (*2) */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0058, 0x8619}, /* Window2 start X (*2) */ {0x0008, 0x861a}, /* Window2 start Y (*2) */ {0x00ff, 0x8615}, /* High lum thresh for white balance */ {0x0000, 0x8616}, /* Low lum thresh for white balance */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ /* READ { 0x0000, 0x8656 } -> 0000: 61 */ {0x0028, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ {0x0010, 0x8801}, /* SSI reg addr */ {0x003e, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0000, 0x8801}, {0x001f, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0001, 0x8602}, /* optical black level for user settning = 1 */ /* Original: */ {0x0023, 0x8700}, /* Clock speed 48Mhz/(3+2)/4= 2.4 Mhz */ {0x000f, 0x8602}, /* optical black level for user settning = 15 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0010, 0x8801}, {0x007b, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x002f, 0x8651}, /* R gain for white balance ... */ {0x0080, 0x8653}, /* READ { 0x0000, 0x8655 } -> 0000: 00 */ {0x0000, 0x8655}, {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE: (ALT=0) ) */ {} }; /* * Initialization data for Intel EasyPC Camera CS110 */ static const u16 spca508cs110_init_data[][2] = { {0x0000, 0x870b}, /* Reset CTL3 */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8111}, /* Normal operation on reset */ {0x0090, 0x8110}, /* External Clock 2x & Synchronous Serial Interface Output */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, /* Initial sequence Synchronous Serial Interface */ {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock two DataByte */ {0x0001, 0x8801}, {0x000a, 0x8805}, /* a - NWG: Dunno what this is about */ {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* 3 Vertical Offset for Valid Lines(L) */ {0x0058, 0x865d}, /* 58 Horizontal Valid Pixel Window(L) */ {0x0006, 0x8660}, /* Nibble data + input order */ {0x000a, 0x8602}, /* Optical black level set to 0x0a */ {0x0000, 0x8603}, /* Optical black level Offset */ /* {0x0000, 0x8611}, * 0 R Offset for white Balance */ /* {0x0000, 0x8612}, * 1 Gr Offset for white Balance */ /* {0x0000, 0x8613}, * 1f B Offset for white Balance */ /* {0x0000, 0x8614}, * f0 Gb Offset for white Balance */ {0x0040, 0x8651}, /* 2b BLUE gain for white balance good at all 60 */ {0x0030, 0x8652}, /* 41 Gr Gain for white Balance (L) */ {0x0035, 0x8653}, /* 26 RED gain for white balance */ {0x0035, 0x8654}, /* 40Gb Gain for white Balance (L) */ {0x0041, 0x863f}, /* Fixed Gamma correction enabled (makes colours look better) */ {0x0000, 0x8655}, /* High bits for white balance*****brightness control*** */ {} }; static const u16 spca508_sightcam_init_data[][2] = { /* This line seems to setup the frame/canvas */ {0x000f, 0x8402}, /* These 6 lines are needed to startup the webcam */ {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, /* This part seems to make the pictures darker? (autobrightness?) */ {0x0001, 0x8801}, {0x0004, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, {0x0004, 0x8801}, {0x00b4, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x00e0, 0x8800}, {0x0007, 0x8801}, {0x000c, 0x8800}, /* This section is just needed, it probably * does something like the previous section, * but the cam won't start if it's not included. */ {0x0014, 0x8801}, {0x0008, 0x8800}, {0x0015, 0x8801}, {0x0067, 0x8800}, {0x0016, 0x8801}, {0x0000, 0x8800}, {0x0017, 0x8801}, {0x0020, 0x8800}, {0x0018, 0x8801}, {0x0044, 0x8800}, /* Makes the picture darker - and the * cam won't start if not included */ {0x001e, 0x8801}, {0x00ea, 0x8800}, {0x001f, 0x8801}, {0x0001, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, /* seems to place the colors ontop of each other #1 */ {0x0006, 0x8704}, {0x0001, 0x870c}, {0x0016, 0x8600}, {0x0002, 0x8606}, /* if not included the pictures becomes _very_ dark */ {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0000, 0x8602}, /* seems to place the colors ontop of each other #2 */ {0x0016, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, /* webcam won't start if not included */ {0x0007, 0x865b}, {0x0001, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, /* adjusts the colors */ {0x0049, 0x8651}, {0x0040, 0x8652}, {0x004c, 0x8653}, {0x0040, 0x8654}, {} }; static const u16 spca508_sightcam2_init_data[][2] = { {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0008, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0009, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000a, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000b, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000c, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000d, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000e, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0007, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000f, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0018, 0x8660}, {0x0010, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0011, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0000, 0x86b0}, {0x0034, 0x86b1}, {0x0000, 0x86b2}, {0x0049, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0012, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0013, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0001, 0x86b0}, {0x00aa, 0x86b1}, {0x0000, 0x86b2}, {0x00e4, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0018, 0x8660}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x0000, 0x8800}, {0x0008, 0x8801}, {0x0005, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x000c, 0x8619}, {0x0004, 0x861a}, {0x00a1, 0x8656}, {0x00c8, 0x8615}, {0x0032, 0x8616}, {0x0030, 0x8112}, {0x0020, 0x8112}, {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x0064, 0x8619}, /* This line starts it all, it is not needed here */ /* since it has been build into the driver */ /* jfm: don't start now */ /* {0x0030, 0x8112}, */ {} }; /* * Initialization data for Creative Webcam Vista */ static const u16 spca508_vista_init_data[][2] = { {0x0008, 0x8200}, /* Clear register */ {0x0000, 0x870b}, /* Reset CTL3 */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8110}, /* Disable everything */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0000, 0x8114}, {0x0003, 0x8111}, {0x0000, 0x8111}, {0x0090, 0x8110}, /* Enable: SSI output, External 2X clock output */ {0x0020, 0x8112}, {0x0000, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, /* Will write 2 bytes (DATA1+DATA2) */ {0x0020, 0x8801}, /* Register address for SSI read/write */ {0x0044, 0x8805}, /* DATA2 */ {0x0004, 0x8800}, /* DATA1 -> write triggered */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003c, 0x8801}, {0x0001, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0001, 0x8801}, {0x000a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x000e, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0028, 0x8801}, {0x002e, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0039, 0x8801}, {0x0013, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003b, 0x8801}, {0x000c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0035, 0x8801}, {0x0028, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0050, 0x8703}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x870c}, /* Select CKOx2 output */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x0023, 0x8601}, {0x0010, 0x8602}, {0x000a, 0x8603}, {0x009a, 0x8600}, {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* Vertical offset for valid lines (L) */ {0x0058, 0x865d}, /* Horizontal valid pixels window (L) */ {0x0048, 0x865e}, /* Vertical valid lines window (L) */ {0x0000, 0x865f}, {0x0006, 0x8660}, /* Enable nibble data input, select nibble input order */ {0x0013, 0x8608}, /* A11 Coeficients for color correction */ {0x0028, 0x8609}, /* Note: these values are confirmed at the end of array */ {0x0005, 0x860a}, /* ... */ {0x0025, 0x860b}, {0x00e1, 0x860c}, {0x00fa, 0x860d}, {0x00f4, 0x860e}, {0x00e8, 0x860f}, {0x0025, 0x8610}, /* A33 Coef. */ {0x00fc, 0x8611}, /* White balance offset: R */ {0x0001, 0x8612}, /* White balance offset: Gr */ {0x00fe, 0x8613}, /* White balance offset: B */ {0x0000, 0x8614}, /* White balance offset: Gb */ {0x0064, 0x8651}, /* R gain for white balance (L) */ {0x0040, 0x8652}, /* Gr gain for white balance (L) */ {0x0066, 0x8653}, /* B gain for white balance (L) */ {0x0040, 0x8654}, /* Gb gain for white balance (L) */ {0x0001, 0x863f}, /* Enable fixed gamma correction */ {0x00a1, 0x8656}, /* Size - Window1: 256x256, Window2: 128x128, * UV division: UV no change, * Enable New edge enhancement */ {0x0018, 0x8657}, /* Edge gain high threshold */ {0x0020, 0x8658}, /* Edge gain low threshold */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ {0x0064, 0x8607}, /* UV filter enable */ {0x0016, 0x8660}, {0x0000, 0x86b0}, /* Bad pixels compensation address */ {0x00dc, 0x86b1}, /* X coord for bad pixels compensation (L) */ {0x0000, 0x86b2}, {0x0009, 0x86b3}, /* Y coord for bad pixels compensation (L) */ {0x0000, 0x86b4}, {0x0001, 0x86b0}, {0x00f5, 0x86b1}, {0x0000, 0x86b2}, {0x00c6, 0x86b3}, {0x0000, 0x86b4}, {0x0002, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d7, 0x86b3}, {0x0000, 0x86b4}, {0x0003, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x0004, 0x86b0}, {0x001d, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x001e, 0x8660}, /* READ { 0x0000, 0x8608 } -> 0000: 13 */ /* READ { 0x0000, 0x8609 } -> 0000: 28 */ /* READ { 0x0000, 0x8610 } -> 0000: 05 */ /* READ { 0x0000, 0x8611 } -> 0000: 25 */ /* READ { 0x0000, 0x8612 } -> 0000: e1 */ /* READ { 0x0000, 0x8613 } -> 0000: fa */ /* READ { 0x0000, 0x8614 } -> 0000: f4 */ /* READ { 0x0000, 0x8615 } -> 0000: e8 */ /* READ { 0x0000, 0x8616 } -> 0000: 25 */ {} }; static int reg_write(struct gspca_dev *gspca_dev, u16 index, u16 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, /* request */ USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write i:0x%04x = 0x%02x\n", index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); return ret; } /* read 1 byte */ /* returns: negative is error, pos or zero is data */ static int reg_read(struct gspca_dev *gspca_dev, u16 index) /* wIndex */ { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0, /* register */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, 1, 500); /* timeout */ gspca_dbg(gspca_dev, D_USBI, "reg read i:%04x --> %02x\n", index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_read err %d\n", ret); return ret; } return gspca_dev->usb_buf[0]; } /* send 1 or 2 bytes to the sensor via the Synchronous Serial Interface */ static int ssi_w(struct gspca_dev *gspca_dev, u16 reg, u16 val) { int ret, retry; ret = reg_write(gspca_dev, 0x8802, reg >> 8); if (ret < 0) goto out; ret = reg_write(gspca_dev, 0x8801, reg & 0x00ff); if (ret < 0) goto out; if ((reg & 0xff00) == 0x1000) { /* if 2 bytes */ ret = reg_write(gspca_dev, 0x8805, val & 0x00ff); if (ret < 0) goto out; val >>= 8; } ret = reg_write(gspca_dev, 0x8800, val); if (ret < 0) goto out; /* poll until not busy */ retry = 10; for (;;) { ret = reg_read(gspca_dev, 0x8803); if (ret < 0) break; if (gspca_dev->usb_buf[0] == 0) break; if (--retry <= 0) { gspca_err(gspca_dev, "ssi_w busy %02x\n", gspca_dev->usb_buf[0]); ret = -1; break; } msleep(8); } out: return ret; } static int write_vector(struct gspca_dev *gspca_dev, const u16 (*data)[2]) { int ret = 0; while ((*data)[1] != 0) { if ((*data)[1] & 0x8000) { if ((*data)[1] == 0xdd00) /* delay */ msleep((*data)[0]); else ret = reg_write(gspca_dev, (*data)[1], (*data)[0]); } else { ret = ssi_w(gspca_dev, (*data)[1], (*data)[0]); } if (ret < 0) break; data++; } return ret; } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; const u16 (*init_data)[2]; static const u16 (*(init_data_tb[]))[2] = { spca508_vista_init_data, /* CreativeVista 0 */ spca508_sightcam_init_data, /* HamaUSBSightcam 1 */ spca508_sightcam2_init_data, /* HamaUSBSightcam2 2 */ spca508cs110_init_data, /* IntelEasyPCCamera 3 */ spca508cs110_init_data, /* MicroInnovationIC200 4 */ spca508_init_data, /* ViewQuestVQ110 5 */ }; int data1, data2; /* Read from global register the USB product and vendor IDs, just to * prove that we can communicate with the device. This works, which * confirms at we are communicating properly and that the device * is a 508. */ data1 = reg_read(gspca_dev, 0x8104); data2 = reg_read(gspca_dev, 0x8105); gspca_dbg(gspca_dev, D_PROBE, "Webcam Vendor ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8106); data2 = reg_read(gspca_dev, 0x8107); gspca_dbg(gspca_dev, D_PROBE, "Webcam Product ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8621); gspca_dbg(gspca_dev, D_PROBE, "Window 1 average luminance: %d\n", data1); cam = &gspca_dev->cam; cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); sd->subtype = id->driver_info; init_data = init_data_tb[sd->subtype]; return write_vector(gspca_dev, init_data); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { return 0; } static int sd_start(struct gspca_dev *gspca_dev) { int mode; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_write(gspca_dev, 0x8500, mode); switch (mode) { case 0: case 1: reg_write(gspca_dev, 0x8700, 0x28); /* clock */ break; default: /* case 2: */ /* case 3: */ reg_write(gspca_dev, 0x8700, 0x23); /* clock */ break; } reg_write(gspca_dev, 0x8112, 0x10 | 0x20); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* Video ISO disable, Video Drop Packet enable: */ reg_write(gspca_dev, 0x8112, 0x20); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA508_OFFSET_DATA; len -= SPCA508_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); break; case 0xff: /* drop */ break; default: data += 1; len -= 1; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); break; } } static void setbrightness(struct gspca_dev *gspca_dev, s32 brightness) { /* MX seem contrast */ reg_write(gspca_dev, 0x8651, brightness); reg_write(gspca_dev, 0x8652, brightness); reg_write(gspca_dev, 0x8653, brightness); reg_write(gspca_dev, 0x8654, brightness); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0130, 0x0130), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x041e, 0x4018), .driver_info = CreativeVista}, {USB_DEVICE(0x0733, 0x0110), .driver_info = ViewQuestVQ110}, {USB_DEVICE(0x0af9, 0x0010), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x0af9, 0x0011), .driver_info = HamaUSBSightcam2}, {USB_DEVICE(0x8086, 0x0110), .driver_info = IntelEasyPCCamera}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
14 1 13 7 2 4 6 15 15 14 15 15 14 1 12 6 7 11 11 11 2 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the Netfilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * Changes: */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <asm/string.h> #include <linux/kmod.h> #include <linux/sysctl.h> #include <net/ip_vs.h> EXPORT_SYMBOL(ip_vs_scheduler_err); /* * IPVS scheduler list */ static LIST_HEAD(ip_vs_schedulers); /* semaphore for schedulers */ static DEFINE_MUTEX(ip_vs_sched_mutex); /* * Bind a service with a scheduler */ int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler) { int ret; if (scheduler->init_service) { ret = scheduler->init_service(svc); if (ret) { pr_err("%s(): init error\n", __func__); return ret; } } rcu_assign_pointer(svc->scheduler, scheduler); return 0; } /* * Unbind a service with its scheduler */ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *sched) { struct ip_vs_scheduler *cur_sched; cur_sched = rcu_dereference_protected(svc->scheduler, 1); /* This check proves that old 'sched' was installed */ if (!cur_sched) return; if (sched->done_service) sched->done_service(svc); /* svc->scheduler can be set to NULL only by caller */ } /* * Get scheduler in the scheduler list by name */ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) { struct ip_vs_scheduler *sched; IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); mutex_lock(&ip_vs_sched_mutex); list_for_each_entry(sched, &ip_vs_schedulers, n_list) { /* * Test and get the modules atomically */ if (sched->module && !try_module_get(sched->module)) { /* * This scheduler is just deleted */ continue; } if (strcmp(sched_name, sched->name)==0) { /* HIT */ mutex_unlock(&ip_vs_sched_mutex); return sched; } module_put(sched->module); } mutex_unlock(&ip_vs_sched_mutex); return NULL; } /* * Lookup scheduler and try to load it if it doesn't exist */ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name) { struct ip_vs_scheduler *sched; /* * Search for the scheduler by sched_name */ sched = ip_vs_sched_getbyname(sched_name); /* * If scheduler not found, load the module and search again */ if (sched == NULL) { request_module("ip_vs_%s", sched_name); sched = ip_vs_sched_getbyname(sched_name); } return sched; } void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) { if (scheduler) module_put(scheduler->module); } /* * Common error output helper for schedulers */ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } /* * Register a scheduler in the scheduler list */ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) { struct ip_vs_scheduler *sched; if (!scheduler) { pr_err("%s(): NULL arg\n", __func__); return -EINVAL; } if (!scheduler->name) { pr_err("%s(): NULL scheduler_name\n", __func__); return -EINVAL; } /* increase the module use count */ if (!ip_vs_use_count_inc()) return -ENOENT; mutex_lock(&ip_vs_sched_mutex); if (!list_empty(&scheduler->n_list)) { mutex_unlock(&ip_vs_sched_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] scheduler already linked\n", __func__, scheduler->name); return -EINVAL; } /* * Make sure that the scheduler with this name doesn't exist * in the scheduler list. */ list_for_each_entry(sched, &ip_vs_schedulers, n_list) { if (strcmp(scheduler->name, sched->name) == 0) { mutex_unlock(&ip_vs_sched_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] scheduler already existed " "in the system\n", __func__, scheduler->name); return -EINVAL; } } /* * Add it into the d-linked scheduler list */ list_add(&scheduler->n_list, &ip_vs_schedulers); mutex_unlock(&ip_vs_sched_mutex); pr_info("[%s] scheduler registered.\n", scheduler->name); return 0; } /* * Unregister a scheduler from the scheduler list */ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) { if (!scheduler) { pr_err("%s(): NULL arg\n", __func__); return -EINVAL; } mutex_lock(&ip_vs_sched_mutex); if (list_empty(&scheduler->n_list)) { mutex_unlock(&ip_vs_sched_mutex); pr_err("%s(): [%s] scheduler is not in the list. failed\n", __func__, scheduler->name); return -EINVAL; } /* * Remove it from the d-linked scheduler list */ list_del(&scheduler->n_list); mutex_unlock(&ip_vs_sched_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); pr_info("[%s] scheduler unregistered.\n", scheduler->name); return 0; }
1 8 8 3 2 4 3 3 1 2 2 2 2 7 6 1 7 5 5 2 2 3 5 1 1 2 2 8 1 3 3 3 3 1 3 1 2 6 2 1 3 3 3 1 1 3 30 1 15 9 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 // SPDX-License-Identifier: GPL-2.0-only /* * drivers/dma-buf/sync_file.c * * Copyright (C) 2012 Google, Inc. */ #include <linux/dma-fence-unwrap.h> #include <linux/export.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/anon_inodes.h> #include <linux/sync_file.h> #include <uapi/linux/sync_file.h> static const struct file_operations sync_file_fops; static struct sync_file *sync_file_alloc(void) { struct sync_file *sync_file; sync_file = kzalloc(sizeof(*sync_file), GFP_KERNEL); if (!sync_file) return NULL; sync_file->file = anon_inode_getfile("sync_file", &sync_file_fops, sync_file, 0); if (IS_ERR(sync_file->file)) goto err; init_waitqueue_head(&sync_file->wq); INIT_LIST_HEAD(&sync_file->cb.node); return sync_file; err: kfree(sync_file); return NULL; } static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb) { struct sync_file *sync_file; sync_file = container_of(cb, struct sync_file, cb); wake_up_all(&sync_file->wq); } /** * sync_file_create() - creates a sync file * @fence: fence to add to the sync_fence * * Creates a sync_file containg @fence. This function acquires and additional * reference of @fence for the newly-created &sync_file, if it succeeds. The * sync_file can be released with fput(sync_file->file). Returns the * sync_file or NULL in case of error. */ struct sync_file *sync_file_create(struct dma_fence *fence) { struct sync_file *sync_file; sync_file = sync_file_alloc(); if (!sync_file) return NULL; sync_file->fence = dma_fence_get(fence); return sync_file; } EXPORT_SYMBOL(sync_file_create); static struct sync_file *sync_file_fdget(int fd) { struct file *file = fget(fd); if (!file) return NULL; if (file->f_op != &sync_file_fops) goto err; return file->private_data; err: fput(file); return NULL; } /** * sync_file_get_fence - get the fence related to the sync_file fd * @fd: sync_file fd to get the fence from * * Ensures @fd references a valid sync_file and returns a fence that * represents all fence in the sync_file. On error NULL is returned. */ struct dma_fence *sync_file_get_fence(int fd) { struct sync_file *sync_file; struct dma_fence *fence; sync_file = sync_file_fdget(fd); if (!sync_file) return NULL; fence = dma_fence_get(sync_file->fence); fput(sync_file->file); return fence; } EXPORT_SYMBOL(sync_file_get_fence); /** * sync_file_get_name - get the name of the sync_file * @sync_file: sync_file to get the fence from * @buf: destination buffer to copy sync_file name into * @len: available size of destination buffer. * * Each sync_file may have a name assigned either by the user (when merging * sync_files together) or created from the fence it contains. In the latter * case construction of the name is deferred until use, and so requires * sync_file_get_name(). * * Returns: a string representing the name. */ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) { if (sync_file->user_name[0]) { strscpy(buf, sync_file->user_name, len); } else { struct dma_fence *fence = sync_file->fence; const char __rcu *timeline; const char __rcu *driver; rcu_read_lock(); driver = dma_fence_driver_name(fence); timeline = dma_fence_timeline_name(fence); snprintf(buf, len, "%s-%s%llu-%lld", rcu_dereference(driver), rcu_dereference(timeline), fence->context, fence->seqno); rcu_read_unlock(); } return buf; } /** * sync_file_merge() - merge two sync_files * @name: name of new fence * @a: sync_file a * @b: sync_file b * * Creates a new sync_file which contains copies of all the fences in both * @a and @b. @a and @b remain valid, independent sync_file. Returns the * new merged sync_file or NULL in case of error. */ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; struct dma_fence *fence; sync_file = sync_file_alloc(); if (!sync_file) return NULL; fence = dma_fence_unwrap_merge(a->fence, b->fence); if (!fence) { fput(sync_file->file); return NULL; } sync_file->fence = fence; strscpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; } static int sync_file_release(struct inode *inode, struct file *file) { struct sync_file *sync_file = file->private_data; if (test_bit(POLL_ENABLED, &sync_file->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); return 0; } static __poll_t sync_file_poll(struct file *file, poll_table *wait) { struct sync_file *sync_file = file->private_data; poll_wait(file, &sync_file->wq, wait); if (list_empty(&sync_file->cb.node) && !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) { if (dma_fence_add_callback(sync_file->fence, &sync_file->cb, fence_check_cb_func) < 0) wake_up_all(&sync_file->wq); } return dma_fence_is_signaled(sync_file->fence) ? EPOLLIN : 0; } static long sync_file_ioctl_merge(struct sync_file *sync_file, unsigned long arg) { int fd = get_unused_fd_flags(O_CLOEXEC); int err; struct sync_file *fence2, *fence3; struct sync_merge_data data; if (fd < 0) return fd; if (copy_from_user(&data, (void __user *)arg, sizeof(data))) { err = -EFAULT; goto err_put_fd; } if (data.flags || data.pad) { err = -EINVAL; goto err_put_fd; } fence2 = sync_file_fdget(data.fd2); if (!fence2) { err = -ENOENT; goto err_put_fd; } data.name[sizeof(data.name) - 1] = '\0'; fence3 = sync_file_merge(data.name, sync_file, fence2); if (!fence3) { err = -ENOMEM; goto err_put_fence2; } data.fence = fd; if (copy_to_user((void __user *)arg, &data, sizeof(data))) { err = -EFAULT; goto err_put_fence3; } fd_install(fd, fence3->file); fput(fence2->file); return 0; err_put_fence3: fput(fence3->file); err_put_fence2: fput(fence2->file); err_put_fd: put_unused_fd(fd); return err; } static int sync_fill_fence_info(struct dma_fence *fence, struct sync_fence_info *info) { const char __rcu *timeline; const char __rcu *driver; rcu_read_lock(); driver = dma_fence_driver_name(fence); timeline = dma_fence_timeline_name(fence); strscpy(info->obj_name, rcu_dereference(timeline), sizeof(info->obj_name)); strscpy(info->driver_name, rcu_dereference(driver), sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); info->timestamp_ns = dma_fence_is_signaled(fence) ? ktime_to_ns(dma_fence_timestamp(fence)) : ktime_set(0, 0); rcu_read_unlock(); return info->status; } static long sync_file_ioctl_fence_info(struct sync_file *sync_file, unsigned long arg) { struct sync_fence_info *fence_info = NULL; struct dma_fence_unwrap iter; struct sync_file_info info; unsigned int num_fences; struct dma_fence *fence; int ret; __u32 size; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) return -EFAULT; if (info.flags || info.pad) return -EINVAL; num_fences = 0; dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) ++num_fences; /* * Passing num_fences = 0 means that userspace doesn't want to * retrieve any sync_fence_info. If num_fences = 0 we skip filling * sync_fence_info and return the actual number of fences on * info->num_fences. */ if (!info.num_fences) { info.status = dma_fence_get_status(sync_file->fence); goto no_fences; } else { info.status = 1; } if (info.num_fences < num_fences) return -EINVAL; size = num_fences * sizeof(*fence_info); fence_info = kzalloc(size, GFP_KERNEL); if (!fence_info) return -ENOMEM; num_fences = 0; dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) { int status; status = sync_fill_fence_info(fence, &fence_info[num_fences++]); info.status = info.status <= 0 ? info.status : status; } if (copy_to_user(u64_to_user_ptr(info.sync_fence_info), fence_info, size)) { ret = -EFAULT; goto out; } no_fences: sync_file_get_name(sync_file, info.name, sizeof(info.name)); info.num_fences = num_fences; if (copy_to_user((void __user *)arg, &info, sizeof(info))) ret = -EFAULT; else ret = 0; out: kfree(fence_info); return ret; } static int sync_file_ioctl_set_deadline(struct sync_file *sync_file, unsigned long arg) { struct sync_set_deadline ts; if (copy_from_user(&ts, (void __user *)arg, sizeof(ts))) return -EFAULT; if (ts.pad) return -EINVAL; dma_fence_set_deadline(sync_file->fence, ns_to_ktime(ts.deadline_ns)); return 0; } static long sync_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct sync_file *sync_file = file->private_data; switch (cmd) { case SYNC_IOC_MERGE: return sync_file_ioctl_merge(sync_file, arg); case SYNC_IOC_FILE_INFO: return sync_file_ioctl_fence_info(sync_file, arg); case SYNC_IOC_SET_DEADLINE: return sync_file_ioctl_set_deadline(sync_file, arg); default: return -ENOTTY; } } static const struct file_operations sync_file_fops = { .release = sync_file_release, .poll = sync_file_poll, .unlocked_ioctl = sync_file_ioctl, .compat_ioctl = compat_ptr_ioctl, };
8 2 7 2 1 7 7 7 2 1 1 1 1 1 1 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 // SPDX-License-Identifier: GPL-2.0-only /* * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key * field in plain GRE. * * Documentation about PPTP can be found in RFC 2637 * * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) * * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/types.h> #include <linux/timer.h> #include <linux/list.h> #include <linux/seq_file.h> #include <linux/in.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> #include <linux/netfilter/nf_conntrack_pptp.h> static const unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_UNREPLIED] = 30*HZ, [GRE_CT_REPLIED] = 180*HZ, }; /* used when expectation is added */ static DEFINE_SPINLOCK(keymap_lock); static inline struct nf_gre_net *gre_pernet(struct net *net) { return &net->ct.nf_ct_proto.gre; } static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) { return km->tuple.src.l3num == t->src.l3num && !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) && !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) && km->tuple.dst.protonum == t->dst.protonum && km->tuple.dst.u.all == t->dst.u.all; } /* look up the source key for a given tuple */ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) { struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km; __be16 key = 0; list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t)) { key = km->tuple.src.u.gre.key; break; } } pr_debug("lookup src key 0x%x for ", key); nf_ct_dump_tuple(t); return key; } /* add a single keymap entry, associate with specified master ct */ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t) { struct net *net = nf_ct_net(ct); struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t) && km == *kmp) return 0; } pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; } km = kmalloc(sizeof(*km), GFP_ATOMIC); if (!km) return -ENOMEM; memcpy(&km->tuple, t, sizeof(*t)); *kmp = km; pr_debug("adding new entry %p: ", km); nf_ct_dump_tuple(&km->tuple); spin_lock_bh(&keymap_lock); list_add_tail(&km->list, &net_gre->keymap_list); spin_unlock_bh(&keymap_lock); return 0; } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); /* destroy the keymap entries associated with specified master ct */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); spin_lock_bh(&keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", ct_pptp_info->keymap[dir]); list_del_rcu(&ct_pptp_info->keymap[dir]->list); kfree_rcu(ct_pptp_info->keymap[dir], rcu); ct_pptp_info->keymap[dir] = NULL; } } spin_unlock_bh(&keymap_lock); } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ /* gre hdr info to tuple */ bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { const struct pptp_gre_header *pgrehdr; struct pptp_gre_header _pgrehdr; __be16 srckey; const struct gre_base_hdr *grehdr; struct gre_base_hdr _grehdr; /* first only delinearize old RFC1701 GRE header */ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { /* try to behave like "nf_conntrack_proto_generic" */ tuple->src.u.all = 0; tuple->dst.u.all = 0; return true; } /* PPTP header is variable length, only need up to the call_id field */ pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); if (!pgrehdr) return true; if (grehdr->protocol != GRE_PROTO_PPP) { pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); return false; } tuple->dst.u.gre.key = pgrehdr->call_id; srckey = gre_keymap_lookup(net, tuple); tuple->src.u.gre.key = srckey; return true; } #ifdef CONFIG_NF_CONNTRACK_PROCFS /* print private data for conntrack */ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "timeout=%u, stream_timeout=%u ", (ct->proto.gre.timeout / HZ), (ct->proto.gre.stream_timeout / HZ)); } #endif static unsigned int *gre_get_timeouts(struct net *net) { return gre_pernet(net)->timeouts; } /* Returns verdict for packet, and may modify conntrack */ int nf_conntrack_gre_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned long status; if (!nf_ct_is_confirmed(ct)) { unsigned int *timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = gre_get_timeouts(nf_ct_net(ct)); /* initialize to sane value. Ideally a conntrack helper * (e.g. in case of pptp) is increasing them */ ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED]; ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED]; } status = READ_ONCE(ct->status); /* If we've seen traffic both ways, this is a GRE connection. * Extend timeout. */ if (status & IPS_SEEN_REPLY) { nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.stream_timeout); /* never set ASSURED for IPS_NAT_CLASH, they time out soon */ if (unlikely((status & IPS_NAT_CLASH))) return NF_ACCEPT; /* Also, more likely to be important, and not a probe. */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); return NF_ACCEPT; } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; struct nf_gre_net *net_gre = gre_pernet(net); if (!timeouts) timeouts = gre_get_timeouts(net); /* set default timeouts for GRE. */ timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED]; timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED]; if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { timeouts[GRE_CT_UNREPLIED] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ; } if (tb[CTA_TIMEOUT_GRE_REPLIED]) { timeouts[GRE_CT_REPLIED] = ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ; } return 0; } static int gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED, htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) || nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED, htonl(timeouts[GRE_CT_REPLIED] / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_gre_init_net(struct net *net) { struct nf_gre_net *net_gre = gre_pernet(net); int i; INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) net_gre->timeouts[i] = gre_timeouts[i]; } /* protocol helper struct */ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, .allow_clash = true, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = gre_timeout_nlattr_to_obj, .obj_to_nlattr = gre_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_GRE_MAX, .obj_size = sizeof(unsigned int) * GRE_CT_MAX, .nla_policy = gre_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ };
2 1 1 6 5 1 6 6 6 5 5 5 5 1 5 6 6 6 6 6 4 3 1 25 11 16 6 4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/proc/root.c * * Copyright (C) 1991, 1992 Linus Torvalds * * proc root directory handling functions */ #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/stat.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/user_namespace.h> #include <linux/fs_context.h> #include <linux/mount.h> #include <linux/pid_namespace.h> #include <linux/fs_parser.h> #include <linux/cred.h> #include <linux/magic.h> #include <linux/slab.h> #include "internal.h" struct proc_fs_context { struct pid_namespace *pid_ns; unsigned int mask; enum proc_hidepid hidepid; int gid; enum proc_pidonly pidonly; }; enum proc_param { Opt_gid, Opt_hidepid, Opt_subset, Opt_pidns, }; static const struct fs_parameter_spec proc_fs_parameters[] = { fsparam_u32("gid", Opt_gid), fsparam_string("hidepid", Opt_hidepid), fsparam_string("subset", Opt_subset), fsparam_file_or_string("pidns", Opt_pidns), {} }; static inline int valid_hidepid(unsigned int value) { return (value == HIDEPID_OFF || value == HIDEPID_NO_ACCESS || value == HIDEPID_INVISIBLE || value == HIDEPID_NOT_PTRACEABLE); } static int proc_parse_hidepid_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; struct fs_parameter_spec hidepid_u32_spec = fsparam_u32("hidepid", Opt_hidepid); struct fs_parse_result result; int base = (unsigned long)hidepid_u32_spec.data; if (param->type != fs_value_is_string) return invalf(fc, "proc: unexpected type of hidepid value\n"); if (!kstrtouint(param->string, base, &result.uint_32)) { if (!valid_hidepid(result.uint_32)) return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string); ctx->hidepid = result.uint_32; return 0; } if (!strcmp(param->string, "off")) ctx->hidepid = HIDEPID_OFF; else if (!strcmp(param->string, "noaccess")) ctx->hidepid = HIDEPID_NO_ACCESS; else if (!strcmp(param->string, "invisible")) ctx->hidepid = HIDEPID_INVISIBLE; else if (!strcmp(param->string, "ptraceable")) ctx->hidepid = HIDEPID_NOT_PTRACEABLE; else return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string); return 0; } static int proc_parse_subset_param(struct fs_context *fc, char *value) { struct proc_fs_context *ctx = fc->fs_private; while (value) { char *ptr = strchr(value, ','); if (ptr != NULL) *ptr++ = '\0'; if (*value != '\0') { if (!strcmp(value, "pid")) { ctx->pidonly = PROC_PIDONLY_ON; } else { return invalf(fc, "proc: unsupported subset option - %s\n", value); } } value = ptr; } return 0; } #ifdef CONFIG_PID_NS static int proc_parse_pidns_param(struct fs_context *fc, struct fs_parameter *param, struct fs_parse_result *result) { struct proc_fs_context *ctx = fc->fs_private; struct pid_namespace *target, *active = task_active_pid_ns(current); struct ns_common *ns; struct file *ns_filp __free(fput) = NULL; switch (param->type) { case fs_value_is_file: /* came through fsconfig, steal the file reference */ ns_filp = no_free_ptr(param->file); break; case fs_value_is_string: ns_filp = filp_open(param->string, O_RDONLY, 0); break; default: WARN_ON_ONCE(true); break; } if (!ns_filp) ns_filp = ERR_PTR(-EBADF); if (IS_ERR(ns_filp)) { errorfc(fc, "could not get file from pidns argument"); return PTR_ERR(ns_filp); } if (!proc_ns_file(ns_filp)) return invalfc(fc, "pidns argument is not an nsfs file"); ns = get_proc_ns(file_inode(ns_filp)); if (ns->ns_type != CLONE_NEWPID) return invalfc(fc, "pidns argument is not a pidns file"); target = container_of(ns, struct pid_namespace, ns); /* * pidns= is shorthand for joining the pidns to get a fsopen fd, so the * permission model should be the same as pidns_install(). */ if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) { errorfc(fc, "insufficient permissions to set pidns"); return -EPERM; } if (!pidns_is_ancestor(target, active)) return invalfc(fc, "cannot set pidns to non-descendant pidns"); put_pid_ns(ctx->pid_ns); ctx->pid_ns = get_pid_ns(target); put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); return 0; } #endif /* CONFIG_PID_NS */ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt, err; opt = fs_parse(fc, proc_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_gid: ctx->gid = result.uint_32; break; case Opt_hidepid: err = proc_parse_hidepid_param(fc, param); if (err) return err; break; case Opt_subset: err = proc_parse_subset_param(fc, param->string); if (err) return err; break; case Opt_pidns: #ifdef CONFIG_PID_NS /* * We would have to RCU-protect every proc_pid_ns() or * proc_sb_info() access if we allowed this to be reconfigured * for an existing procfs instance. Luckily, procfs instances * are cheap to create, and mount-beneath would let you * atomically replace an instance even with overmounts. */ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { errorfc(fc, "cannot reconfigure pidns for existing procfs"); return -EBUSY; } err = proc_parse_pidns_param(fc, param, &result); if (err) return err; break; #else errorfc(fc, "pidns mount flag not supported on this system"); return -EOPNOTSUPP; #endif default: return -EINVAL; } ctx->mask |= 1 << opt; return 0; } static void proc_apply_options(struct proc_fs_info *fs_info, struct fs_context *fc, struct user_namespace *user_ns) { struct proc_fs_context *ctx = fc->fs_private; if (ctx->mask & (1 << Opt_gid)) fs_info->pid_gid = make_kgid(user_ns, ctx->gid); if (ctx->mask & (1 << Opt_hidepid)) fs_info->hide_pid = ctx->hidepid; if (ctx->mask & (1 << Opt_subset)) fs_info->pidonly = ctx->pidonly; if (ctx->mask & (1 << Opt_pidns) && !WARN_ON_ONCE(fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)) { put_pid_ns(fs_info->pid_ns); fs_info->pid_ns = get_pid_ns(ctx->pid_ns); } } static int proc_fill_super(struct super_block *s, struct fs_context *fc) { struct proc_fs_context *ctx = fc->fs_private; struct inode *root_inode; struct proc_fs_info *fs_info; int ret; fs_info = kzalloc(sizeof(*fs_info), GFP_KERNEL); if (!fs_info) return -ENOMEM; fs_info->pid_ns = get_pid_ns(ctx->pid_ns); proc_apply_options(fs_info, fc, current_user_ns()); /* User space would break if executables or devices appear on proc */ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = PROC_SUPER_MAGIC; s->s_op = &proc_sops; s->s_time_gran = 1; s->s_fs_info = fs_info; /* * procfs isn't actually a stacking filesystem; however, there is * too much magic going on inside it to permit stacking things on * top of it */ s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; /* procfs dentries and inodes don't require IO to create */ s->s_shrink->seeks = 0; pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); if (!root_inode) { pr_err("proc_fill_super: get root inode failed\n"); return -ENOMEM; } s->s_root = d_make_root(root_inode); if (!s->s_root) { pr_err("proc_fill_super: allocate dentry failed\n"); return -ENOMEM; } ret = proc_setup_self(s); if (ret) { return ret; } return proc_setup_thread_self(s); } static int proc_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct proc_fs_info *fs_info = proc_sb_info(sb); sync_filesystem(sb); proc_apply_options(fs_info, fc, current_user_ns()); return 0; } static int proc_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, proc_fill_super); } static void proc_fs_context_free(struct fs_context *fc) { struct proc_fs_context *ctx = fc->fs_private; put_pid_ns(ctx->pid_ns); kfree(ctx); } static const struct fs_context_operations proc_fs_context_ops = { .free = proc_fs_context_free, .parse_param = proc_parse_param, .get_tree = proc_get_tree, .reconfigure = proc_reconfigure, }; static int proc_init_fs_context(struct fs_context *fc) { struct proc_fs_context *ctx; ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); fc->fs_private = ctx; fc->ops = &proc_fs_context_ops; return 0; } static void proc_kill_sb(struct super_block *sb) { struct proc_fs_info *fs_info = proc_sb_info(sb); if (!fs_info) { kill_anon_super(sb); return; } dput(fs_info->proc_self); dput(fs_info->proc_thread_self); kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); kfree_rcu(fs_info, rcu); } static struct file_system_type proc_fs_type = { .name = "proc", .init_fs_context = proc_init_fs_context, .parameters = proc_fs_parameters, .kill_sb = proc_kill_sb, .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; void __init proc_root_init(void) { proc_init_kmemcache(); set_proc_pid_nlink(); proc_self_init(); proc_thread_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); proc_sys_init(); /* * Last things last. It is not like userspace processes eager * to open /proc files exist at this point but register last * anyway. */ register_filesystem(&proc_fs_type); } static int proc_root_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry), stat); stat->nlink = proc_root.nlink + nr_processes(); return 0; } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { if (!proc_pid_lookup(dentry, flags)) return NULL; return proc_lookup(dir, dentry, flags); } static int proc_root_readdir(struct file *file, struct dir_context *ctx) { if (ctx->pos < FIRST_PROCESS_ENTRY) { int error = proc_readdir(file, ctx); if (unlikely(error <= 0)) return error; ctx->pos = FIRST_PROCESS_ENTRY; } return proc_pid_readdir(file, ctx); } /* * The root /proc directory is special, as it has the * <pid> directories. Thus we don't use the generic * directory handling functions for that.. */ static const struct file_operations proc_root_operations = { .read = generic_read_dir, .iterate_shared = proc_root_readdir, .llseek = generic_file_llseek, }; /* * proc root can do almost nothing.. */ static const struct inode_operations proc_root_inode_operations = { .lookup = proc_root_lookup, .getattr = proc_root_getattr, }; /* * This is the root "inode" in the /proc tree.. */ struct proc_dir_entry proc_root = { .low_ino = PROCFS_ROOT_INO, .namelen = 5, .mode = S_IFDIR | S_IRUGO | S_IXUGO, .nlink = 2, .refcnt = REFCOUNT_INIT(1), .proc_iops = &proc_root_inode_operations, .proc_dir_ops = &proc_root_operations, .parent = &proc_root, .subdir = RB_ROOT, .name = "/proc", };
1 18 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> */ #undef TRACE_SYSTEM #define TRACE_SYSTEM nfs #if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NFS_H #include <linux/tracepoint.h> #include <linux/iversion.h> #include <trace/misc/fs.h> #include <trace/misc/nfs.h> #include <trace/misc/sunrpc.h> #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \ { NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \ { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ { NFS_INO_INVALID_MODE, "INVALID_MODE" }, \ { NFS_INO_INVALID_BTIME, "INVALID_BTIME" }) #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ { BIT(NFS_INO_STALE), "STALE" }, \ { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ { BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \ { BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \ { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ { BIT(NFS_INO_ODIRECT), "ODIRECT" }) #define nfs_show_wb_flags(v) \ __print_flags(v, "|", \ { BIT(PG_BUSY), "BUSY" }, \ { BIT(PG_MAPPED), "MAPPED" }, \ { BIT(PG_FOLIO), "FOLIO" }, \ { BIT(PG_CLEAN), "CLEAN" }, \ { BIT(PG_COMMIT_TO_DS), "COMMIT_TO_DS" }, \ { BIT(PG_INODE_REF), "INODE_REF" }, \ { BIT(PG_HEADLOCK), "HEADLOCK" }, \ { BIT(PG_TEARDOWN), "TEARDOWN" }, \ { BIT(PG_UNLOCKPAGE), "UNLOCKPAGE" }, \ { BIT(PG_UPTODATE), "UPTODATE" }, \ { BIT(PG_WB_END), "WB_END" }, \ { BIT(PG_REMOVE), "REMOVE" }, \ { BIT(PG_CONTENDED1), "CONTENDED1" }, \ { BIT(PG_CONTENDED2), "CONTENDED2" }) DECLARE_EVENT_CLASS(nfs_inode_event, TP_PROTO( const struct inode *inode ), TP_ARGS(inode), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(unsigned long, cache_validity) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->cache_validity = nfsi->cache_validity; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cache_validity=0x%lx (%s)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (unsigned long long)__entry->version, __entry->cache_validity, nfs_show_cache_validity(__entry->cache_validity) ) ); DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_PROTO( const struct inode *inode, int error ), TP_ARGS(inode, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(unsigned char, type) __field(u64, fileid) __field(u64, version) __field(loff_t, size) __field(unsigned long, nfsi_flags) __field(unsigned long, cache_validity) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->type = nfs_umode_to_dtype(inode->i_mode); __entry->version = inode_peek_iversion_raw(inode); __entry->size = i_size_read(inode); __entry->nfsi_flags = nfsi->flags; __entry->cache_validity = nfsi->cache_validity; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, nfs_show_cache_validity(__entry->cache_validity), __entry->nfsi_flags, nfs_show_nfsi_flags(__entry->nfsi_flags) ) ); #define DEFINE_NFS_INODE_EVENT(name) \ DEFINE_EVENT(nfs_inode_event, name, \ TP_PROTO( \ const struct inode *inode \ ), \ TP_ARGS(inode)) #define DEFINE_NFS_INODE_EVENT_DONE(name) \ DEFINE_EVENT(nfs_inode_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ int error \ ), \ TP_ARGS(inode, error)) DEFINE_NFS_INODE_EVENT(nfs_set_inode_stale); DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit); DEFINE_NFS_INODE_EVENT(nfs_getattr_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit); DEFINE_NFS_INODE_EVENT(nfs_setattr_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit); DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid); DEFINE_NFS_INODE_EVENT(nfs_readdir_force_readdirplus); DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_cache_fill_done); DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_uncached_done); TRACE_EVENT(nfs_access_exit, TP_PROTO( const struct inode *inode, unsigned int mask, unsigned int permitted, int error ), TP_ARGS(inode, mask, permitted, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(unsigned char, type) __field(u64, fileid) __field(u64, version) __field(loff_t, size) __field(unsigned long, nfsi_flags) __field(unsigned long, cache_validity) __field(unsigned int, mask) __field(unsigned int, permitted) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->type = nfs_umode_to_dtype(inode->i_mode); __entry->version = inode_peek_iversion_raw(inode); __entry->size = i_size_read(inode); __entry->nfsi_flags = nfsi->flags; __entry->cache_validity = nfsi->cache_validity; __entry->mask = mask; __entry->permitted = permitted; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " "mask=0x%x permitted=0x%x", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, nfs_show_cache_validity(__entry->cache_validity), __entry->nfsi_flags, nfs_show_nfsi_flags(__entry->nfsi_flags), __entry->mask, __entry->permitted ) ); DECLARE_EVENT_CLASS(nfs_update_size_class, TP_PROTO( const struct inode *inode, loff_t new_size ), TP_ARGS(inode, new_size), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, cur_size) __field(loff_t, new_size) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->fileid = nfsi->fileid; __entry->version = inode_peek_iversion_raw(inode); __entry->cur_size = i_size_read(inode); __entry->new_size = new_size; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cursize=%lld newsize=%lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->cur_size, __entry->new_size ) ); #define DEFINE_NFS_UPDATE_SIZE_EVENT(name) \ DEFINE_EVENT(nfs_update_size_class, nfs_size_##name, \ TP_PROTO( \ const struct inode *inode, \ loff_t new_size \ ), \ TP_ARGS(inode, new_size)) DEFINE_NFS_UPDATE_SIZE_EVENT(truncate); DEFINE_NFS_UPDATE_SIZE_EVENT(truncate_folio); DEFINE_NFS_UPDATE_SIZE_EVENT(wcc); DEFINE_NFS_UPDATE_SIZE_EVENT(update); DEFINE_NFS_UPDATE_SIZE_EVENT(grow); DECLARE_EVENT_CLASS(nfs_inode_range_event, TP_PROTO( const struct inode *inode, loff_t range_start, loff_t range_end ), TP_ARGS(inode, range_start, range_end), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, range_start) __field(loff_t, range_end) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->fileid = nfsi->fileid; __entry->version = inode_peek_iversion_raw(inode); __entry->range_start = range_start; __entry->range_end = range_end; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "range=[%lld, %lld]", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->range_start, __entry->range_end ) ); #define DEFINE_NFS_INODE_RANGE_EVENT(name) \ DEFINE_EVENT(nfs_inode_range_event, name, \ TP_PROTO( \ const struct inode *inode, \ loff_t range_start, \ loff_t range_end \ ), \ TP_ARGS(inode, range_start, range_end)) DEFINE_NFS_INODE_RANGE_EVENT(nfs_readdir_invalidate_cache_range); DECLARE_EVENT_CLASS(nfs_readdir_event, TP_PROTO( const struct file *file, const __be32 *verifier, u64 cookie, pgoff_t page_index, unsigned int dtsize ), TP_ARGS(file, verifier, cookie, page_index, dtsize), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __array(char, verifier, NFS4_VERIFIER_SIZE) __field(u64, cookie) __field(pgoff_t, index) __field(unsigned int, dtsize) ), TP_fast_assign( const struct inode *dir = file_inode(file); const struct nfs_inode *nfsi = NFS_I(dir); __entry->dev = dir->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(dir); if (cookie != 0) memcpy(__entry->verifier, verifier, NFS4_VERIFIER_SIZE); else memset(__entry->verifier, 0, NFS4_VERIFIER_SIZE); __entry->cookie = cookie; __entry->index = page_index; __entry->dtsize = dtsize; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "cookie=%s:0x%llx cache_index=%lu dtsize=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, show_nfs4_verifier(__entry->verifier), (unsigned long long)__entry->cookie, __entry->index, __entry->dtsize ) ); #define DEFINE_NFS_READDIR_EVENT(name) \ DEFINE_EVENT(nfs_readdir_event, name, \ TP_PROTO( \ const struct file *file, \ const __be32 *verifier, \ u64 cookie, \ pgoff_t page_index, \ unsigned int dtsize \ ), \ TP_ARGS(file, verifier, cookie, page_index, dtsize)) DEFINE_NFS_READDIR_EVENT(nfs_readdir_cache_fill); DEFINE_NFS_READDIR_EVENT(nfs_readdir_uncached); DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags ), TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __field(u64, fileid) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name); ), TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name), __entry->fileid ) ); #define DEFINE_NFS_LOOKUP_EVENT(name) \ DEFINE_EVENT(nfs_lookup_event, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry, \ unsigned int flags \ ), \ TP_ARGS(dir, dentry, flags)) DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags, int error ), TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __field(u64, fileid) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __entry->flags = flags; __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name), __entry->fileid ) ); #define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \ DEFINE_EVENT(nfs_lookup_event_done, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry, \ unsigned int flags, \ int error \ ), \ TP_ARGS(dir, dentry, flags, error)) DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup); DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup_revalidate_failed); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_readdir_lookup_revalidate); TRACE_EVENT(nfs_atomic_open_enter, TP_PROTO( const struct inode *dir, const struct nfs_open_context *ctx, unsigned int flags ), TP_ARGS(dir, ctx, flags), TP_STRUCT__entry( __field(unsigned long, flags) __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name); ), TP_printk( "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, show_fs_fcntl_open_flags(__entry->flags), show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_atomic_open_exit, TP_PROTO( const struct inode *dir, const struct nfs_open_context *ctx, unsigned int flags, int error ), TP_ARGS(dir, ctx, flags, error), TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) ), TP_fast_assign( __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_create_enter, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags ), TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __assign_str(name); ), TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_create_exit, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags, int error ), TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __assign_str(name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); DECLARE_EVENT_CLASS(nfs_directory_event, TP_PROTO( const struct inode *dir, const struct dentry *dentry ), TP_ARGS(dir, dentry), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __assign_str(name); ), TP_printk( "name=%02x:%02x:%llu/%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); #define DEFINE_NFS_DIRECTORY_EVENT(name) \ DEFINE_EVENT(nfs_directory_event, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry \ ), \ TP_ARGS(dir, dentry)) DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_PROTO( const struct inode *dir, const struct dentry *dentry, int error ), TP_ARGS(dir, dentry, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __assign_str(name); ), TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); #define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \ DEFINE_EVENT(nfs_directory_event_done, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry, \ int error \ ), \ TP_ARGS(dir, dentry, error)) DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit); TRACE_EVENT(nfs_link_enter, TP_PROTO( const struct inode *inode, const struct inode *dir, const struct dentry *dentry ), TP_ARGS(inode, dir, dentry), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, fileid) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); __assign_str(name); ), TP_printk( "fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_link_exit, TP_PROTO( const struct inode *inode, const struct inode *dir, const struct dentry *dentry, int error ), TP_ARGS(inode, dir, dentry, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u64, fileid) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __assign_str(name); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); DECLARE_EVENT_CLASS(nfs_rename_event, TP_PROTO( const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, const struct dentry *new_dentry ), TP_ARGS(old_dir, old_dentry, new_dir, new_dentry), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, old_dir) __field(u64, new_dir) __string(old_name, old_dentry->d_name.name) __string(new_name, new_dentry->d_name.name) ), TP_fast_assign( __entry->dev = old_dir->i_sb->s_dev; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); __assign_str(old_name); __assign_str(new_name); ), TP_printk( "old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->new_dir, __get_str(new_name) ) ); #define DEFINE_NFS_RENAME_EVENT(name) \ DEFINE_EVENT(nfs_rename_event, name, \ TP_PROTO( \ const struct inode *old_dir, \ const struct dentry *old_dentry, \ const struct inode *new_dir, \ const struct dentry *new_dentry \ ), \ TP_ARGS(old_dir, old_dentry, new_dir, new_dentry)) DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_PROTO( const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, const struct dentry *new_dentry, int error ), TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, error) __field(u64, old_dir) __string(old_name, old_dentry->d_name.name) __field(u64, new_dir) __string(new_name, new_dentry->d_name.name) ), TP_fast_assign( __entry->dev = old_dir->i_sb->s_dev; __entry->error = -error; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); __assign_str(old_name); __assign_str(new_name); ), TP_printk( "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->new_dir, __get_str(new_name) ) ); #define DEFINE_NFS_RENAME_EVENT_DONE(name) \ DEFINE_EVENT(nfs_rename_event_done, name, \ TP_PROTO( \ const struct inode *old_dir, \ const struct dentry *old_dentry, \ const struct inode *new_dir, \ const struct dentry *new_dentry, \ int error \ ), \ TP_ARGS(old_dir, old_dentry, new_dir, \ new_dentry, error)) DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done); TRACE_EVENT(nfs_sillyrename_unlink, TP_PROTO( const struct nfs_unlinkdata *data, int error ), TP_ARGS(data, error), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, error) __field(u64, dir) __dynamic_array(char, name, data->args.name.len + 1) ), TP_fast_assign( struct inode *dir = d_inode(data->dentry->d_parent); size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = -error; memcpy(__get_str(name), data->args.name.name, len); __get_str(name)[len] = 0; ), TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); DECLARE_EVENT_CLASS(nfs_folio_event, TP_PROTO( const struct inode *inode, loff_t offset, size_t count ), TP_ARGS(inode, offset, count), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(size_t, count) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = offset; __entry->count = count; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "offset=%lld count=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->count ) ); #define DEFINE_NFS_FOLIO_EVENT(name) \ DEFINE_EVENT(nfs_folio_event, name, \ TP_PROTO( \ const struct inode *inode, \ loff_t offset, \ size_t count \ ), \ TP_ARGS(inode, offset, count)) DECLARE_EVENT_CLASS(nfs_folio_event_done, TP_PROTO( const struct inode *inode, loff_t offset, size_t count, int ret ), TP_ARGS(inode, offset, count, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(int, ret) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(size_t, count) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = offset; __entry->count = count; __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "offset=%lld count=%zu ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->count, __entry->ret ) ); #define DEFINE_NFS_FOLIO_EVENT_DONE(name) \ DEFINE_EVENT(nfs_folio_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ loff_t offset, \ size_t count, \ int ret \ ), \ TP_ARGS(inode, offset, count, ret)) DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done); DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done); DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done); DEFINE_NFS_FOLIO_EVENT(nfs_try_to_update_request); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_try_to_update_request_done); DEFINE_NFS_FOLIO_EVENT(nfs_update_folio); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_update_folio_done); DEFINE_NFS_FOLIO_EVENT(nfs_write_begin); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_begin_done); DEFINE_NFS_FOLIO_EVENT(nfs_write_end); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_end_done); DEFINE_NFS_FOLIO_EVENT(nfs_writepages); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writepages_done); DECLARE_EVENT_CLASS(nfs_kiocb_event, TP_PROTO( const struct kiocb *iocb, const struct iov_iter *iter ), TP_ARGS(iocb, iter), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(size_t, count) __field(int, flags) ), TP_fast_assign( const struct inode *inode = file_inode(iocb->ki_filp); const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = iocb->ki_pos; __entry->count = iov_iter_count(iter); __entry->flags = iocb->ki_flags; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld count=%zu ki_flags=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->count, __print_flags(__entry->flags, "|", TRACE_IOCB_STRINGS) ) ); #define DEFINE_NFS_KIOCB_EVENT(name) \ DEFINE_EVENT(nfs_kiocb_event, name, \ TP_PROTO( \ const struct kiocb *iocb, \ const struct iov_iter *iter \ ), \ TP_ARGS(iocb, iter)) DEFINE_NFS_KIOCB_EVENT(nfs_file_read); DEFINE_NFS_KIOCB_EVENT(nfs_file_write); TRACE_EVENT(nfs_aop_readahead, TP_PROTO( const struct inode *inode, loff_t pos, unsigned int nr_pages ), TP_ARGS(inode, pos, nr_pages), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(unsigned int, nr_pages) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = pos; __entry->nr_pages = nr_pages; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld nr_pages=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->nr_pages ) ); TRACE_EVENT(nfs_aop_readahead_done, TP_PROTO( const struct inode *inode, unsigned int nr_pages, int ret ), TP_ARGS(inode, nr_pages, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(int, ret) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(unsigned int, nr_pages) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->nr_pages = nr_pages; __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu nr_pages=%u ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->nr_pages, __entry->ret ) ); TRACE_EVENT(nfs_initiate_read, TP_PROTO( const struct nfs_pgio_header *hdr ), TP_ARGS(hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, count) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_readpage_done, TP_PROTO( const struct rpc_task *task, const struct nfs_pgio_header *hdr ), TP_ARGS(task, hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(bool, eof) __field(int, error) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->eof ? " eof" : "" ) ); TRACE_EVENT(nfs_readpage_short, TP_PROTO( const struct rpc_task *task, const struct nfs_pgio_header *hdr ), TP_ARGS(task, hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(bool, eof) __field(int, error) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->eof ? " eof" : "" ) ); TRACE_EVENT(nfs_pgio_error, TP_PROTO( const struct nfs_pgio_header *hdr, int error, loff_t pos ), TP_ARGS(hdr, error, pos), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(loff_t, pos) __field(int, error) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->error = error; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk("error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u pos=%llu", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->pos ) ); TRACE_EVENT(nfs_initiate_write, TP_PROTO( const struct nfs_pgio_header *hdr ), TP_ARGS(hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, count) __field(unsigned long, stable) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; __entry->stable = hdr->args.stable; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u stable=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count, show_nfs_stable_how(__entry->stable) ) ); TRACE_EVENT(nfs_writeback_done, TP_PROTO( const struct rpc_task *task, const struct nfs_pgio_header *hdr ), TP_ARGS(task, hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(int, error) __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; const struct nfs_writeverf *verf = hdr->res.verf; __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->stable = verf->committed; memcpy(__entry->verifier, &verf->verifier, NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u stable=%s " "verifier=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, show_nfs_stable_how(__entry->stable), show_nfs4_verifier(__entry->verifier) ) ); DECLARE_EVENT_CLASS(nfs_page_class, TP_PROTO( const struct nfs_page *req ), TP_ARGS(req), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(const struct nfs_page *__private, req) __field(loff_t, offset) __field(unsigned int, count) __field(unsigned long, flags) ), TP_fast_assign( const struct inode *inode = folio_inode(req->wb_folio); const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->req = req; __entry->offset = req_offset(req); __entry->count = req->wb_bytes; __entry->flags = req->wb_flags; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x req=%p offset=%lld count=%u flags=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->req, __entry->offset, __entry->count, nfs_show_wb_flags(__entry->flags) ) ); #define DEFINE_NFS_PAGE_EVENT(name) \ DEFINE_EVENT(nfs_page_class, name, \ TP_PROTO( \ const struct nfs_page *req \ ), \ TP_ARGS(req)) DEFINE_NFS_PAGE_EVENT(nfs_writepage_setup); DEFINE_NFS_PAGE_EVENT(nfs_do_writepage); DECLARE_EVENT_CLASS(nfs_page_error_class, TP_PROTO( const struct inode *inode, const struct nfs_page *req, int error ), TP_ARGS(inode, req, error), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(unsigned int, count) __field(int, error) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->offset = req_offset(req); __entry->count = req->wb_bytes; __entry->error = error; ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count ) ); #define DEFINE_NFS_PAGEERR_EVENT(name) \ DEFINE_EVENT(nfs_page_error_class, name, \ TP_PROTO( \ const struct inode *inode, \ const struct nfs_page *req, \ int error \ ), \ TP_ARGS(inode, req, error)) DEFINE_NFS_PAGEERR_EVENT(nfs_write_error); DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error); DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error); TRACE_EVENT(nfs_initiate_commit, TP_PROTO( const struct nfs_commit_data *data ), TP_ARGS(data), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = data->args.fh ? data->args.fh : &nfsi->fh; __entry->offset = data->args.offset; __entry->count = data->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_commit_done, TP_PROTO( const struct rpc_task *task, const struct nfs_commit_data *data ), TP_ARGS(task, data), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(int, error) __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = data->args.fh ? data->args.fh : &nfsi->fh; const struct nfs_writeverf *verf = data->res.verf; __entry->error = task->tk_status; __entry->offset = data->args.offset; __entry->stable = verf->committed; memcpy(__entry->verifier, &verf->verifier, NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld stable=%s verifier=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, show_nfs_stable_how(__entry->stable), show_nfs4_verifier(__entry->verifier) ) ); #define nfs_show_direct_req_flags(v) \ __print_flags(v, "|", \ { NFS_ODIRECT_DO_COMMIT, "DO_COMMIT" }, \ { NFS_ODIRECT_RESCHED_WRITES, "RESCHED_WRITES" }, \ { NFS_ODIRECT_SHOULD_DIRTY, "SHOULD DIRTY" }, \ { NFS_ODIRECT_DONE, "DONE" } ) DECLARE_EVENT_CLASS(nfs_direct_req_class, TP_PROTO( const struct nfs_direct_req *dreq ), TP_ARGS(dreq), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, fileid) __field(u32, fhandle) __field(loff_t, offset) __field(ssize_t, count) __field(ssize_t, error) __field(int, flags) ), TP_fast_assign( const struct inode *inode = dreq->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = &nfsi->fh; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = dreq->io_start; __entry->count = dreq->count; __entry->error = dreq->error; __entry->flags = dreq->flags; ), TP_printk( "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zd flags=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count, nfs_show_direct_req_flags(__entry->flags) ) ); #define DEFINE_NFS_DIRECT_REQ_EVENT(name) \ DEFINE_EVENT(nfs_direct_req_class, name, \ TP_PROTO( \ const struct nfs_direct_req *dreq \ ), \ TP_ARGS(dreq)) DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_commit_complete); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_resched_write); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_complete); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io); TRACE_EVENT(nfs_fh_to_dentry, TP_PROTO( const struct super_block *sb, const struct nfs_fh *fh, u64 fileid, int error ), TP_ARGS(sb, fh, fileid, error), TP_STRUCT__entry( __field(int, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) ), TP_fast_assign( __entry->error = error; __entry->dev = sb->s_dev; __entry->fileid = fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle ) ); TRACE_EVENT(nfs_mount_assign, TP_PROTO( const char *option, const char *value ), TP_ARGS(option, value), TP_STRUCT__entry( __string(option, option) __string(value, value) ), TP_fast_assign( __assign_str(option); __assign_str(value); ), TP_printk("option %s=%s", __get_str(option), __get_str(value) ) ); TRACE_EVENT(nfs_mount_option, TP_PROTO( const struct fs_parameter *param ), TP_ARGS(param), TP_STRUCT__entry( __string(option, param->key) ), TP_fast_assign( __assign_str(option); ), TP_printk("option %s", __get_str(option)) ); TRACE_EVENT(nfs_mount_path, TP_PROTO( const char *path ), TP_ARGS(path), TP_STRUCT__entry( __string(path, path) ), TP_fast_assign( __assign_str(path); ), TP_printk("path='%s'", __get_str(path)) ); TRACE_EVENT(nfs_local_open_fh, TP_PROTO( const struct nfs_fh *fh, fmode_t fmode, int error ), TP_ARGS(fh, fmode, error), TP_STRUCT__entry( __field(int, error) __field(u32, fhandle) __field(unsigned int, fmode) ), TP_fast_assign( __entry->error = error; __entry->fhandle = nfs_fhandle_hash(fh); __entry->fmode = (__force unsigned int)fmode; ), TP_printk( "error=%d fhandle=0x%08x mode=%s", __entry->error, __entry->fhandle, show_fs_fmode_flags(__entry->fmode) ) ); DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, int error ), TP_ARGS(xdr, error), TP_STRUCT__entry( __field(unsigned int, task_id) __field(unsigned int, client_id) __field(u32, xid) __field(int, version) __field(unsigned long, error) __string(program, xdr->rqst->rq_task->tk_client->cl_program->name) __string(procedure, xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) ), TP_fast_assign( const struct rpc_rqst *rqstp = xdr->rqst; const struct rpc_task *task = rqstp->rq_task; __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->version = task->tk_client->cl_vers; __entry->error = error; __assign_str(program); __assign_str(procedure); ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, __get_str(program), __entry->version, __get_str(procedure), -__entry->error, show_nfs_status(__entry->error) ) ); #define DEFINE_NFS_XDR_EVENT(name) \ DEFINE_EVENT(nfs_xdr_event, name, \ TP_PROTO( \ const struct xdr_stream *xdr, \ int error \ ), \ TP_ARGS(xdr, error)) DEFINE_NFS_XDR_EVENT(nfs_xdr_status); DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle); #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE nfstrace /* This part must be outside protection */ #include <trace/define_trace.h>
543 133 10 7 1907 18 912 82 664 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 /* SPDX-License-Identifier: GPL-2.0 */ /* * NUMA memory policies for Linux. * Copyright 2003,2004 Andi Kleen SuSE Labs */ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 #include <linux/sched.h> #include <linux/mmzone.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/node.h> #include <linux/nodemask.h> #include <linux/pagemap.h> #include <uapi/linux/mempolicy.h> struct mm_struct; #define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ #ifdef CONFIG_NUMA /* * Describe a memory policy. * * A mempolicy can be either associated with a process or with a VMA. * For VMA related allocations the VMA policy is preferred, otherwise * the process policy is used. Interrupts ignore the memory policy * of the current process. * * Locking policy for interleave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on * mmap_lock. * * Freeing policy: * Mempolicy objects are reference counted. A mempolicy will be freed when * mpol_put() decrements the reference count to zero. * * Duplicating policy objects: * mpol_dup() allocates a new mempolicy and copies the specified mempolicy * to the new storage. The reference count of the new object is initialized * to 1, representing the caller of mpol_dup(). */ struct mempolicy { atomic_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/preferred/etc */ int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; }; /* * Support for managing mempolicy data objects (clone, copy, destroy) * The default fast path of a NULL MPOL_DEFAULT policy is always inlined. */ extern void __mpol_put(struct mempolicy *pol); static inline void mpol_put(struct mempolicy *pol) { if (pol) __mpol_put(pol); } /* * Does mempolicy pol need explicit unref after use? * Currently only needed for shared policies. */ static inline int mpol_needs_cond_ref(struct mempolicy *pol) { return (pol && (pol->flags & MPOL_F_SHARED)); } static inline void mpol_cond_put(struct mempolicy *pol) { if (mpol_needs_cond_ref(pol)) __mpol_put(pol); } extern struct mempolicy *__mpol_dup(struct mempolicy *pol); static inline struct mempolicy *mpol_dup(struct mempolicy *pol) { if (pol) pol = __mpol_dup(pol); return pol; } static inline void mpol_get(struct mempolicy *pol) { if (pol) atomic_inc(&pol->refcnt); } extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b); static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { if (a == b) return true; return __mpol_equal(a, b); } /* * Tree of shared policies for a shared memory region. */ struct shared_policy { struct rb_root root; rwlock_t lock; }; struct sp_node { struct rb_node nd; pgoff_t start, end; struct mempolicy *policy; }; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *mpol); void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); extern void numa_policy_init(void); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask); extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone && k != ZONE_MOVABLE) policy_zone = k; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags); #ifdef CONFIG_TMPFS extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long addr); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return (pol->mode == MPOL_PREFERRED_MANY); } extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); extern int mempolicy_set_node_perf(unsigned int node, struct access_coordinate *coords); #else struct mempolicy {}; static inline struct mempolicy *get_task_policy(struct task_struct *p) { return NULL; } static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } static inline void mpol_put(struct mempolicy *pol) { } static inline void mpol_cond_put(struct mempolicy *pol) { } static inline void mpol_get(struct mempolicy *pol) { } struct shared_policy {}; static inline void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { } static inline void mpol_free_shared_policy(struct shared_policy *sp) { } static inline struct mempolicy * mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx) { *ilx = 0; return NULL; } static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { return 0; } static inline void numa_policy_init(void) { } static inline void numa_default_policy(void) { } static inline void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { } static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { } static inline int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { *mpol = NULL; *nodemask = NULL; return 0; } static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; } static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { return 0; } static inline void check_highest_zone(int k) { } #ifdef CONFIG_TMPFS static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif static inline int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long address) { return -1; /* no node preference */ } static inline void mpol_put_task_policy(struct task_struct *task) { } static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; } #endif /* CONFIG_NUMA */ #endif
11 1 10 14 1 1 1 11 2 1 9 1 21 21 38 2 36 79 83 23 2 82 83 83 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 // SPDX-License-Identifier: GPL-2.0-or-later // // Validation of USB-audio class descriptors // #include <linux/init.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <linux/usb/midi.h> #include "usbaudio.h" #include "helper.h" struct usb_desc_validator { unsigned char protocol; unsigned char type; bool (*func)(const void *p, const struct usb_desc_validator *v); size_t size; }; #define UAC_VERSION_ALL (unsigned char)(-1) /* UAC1 only */ static bool validate_uac1_header(const void *p, const struct usb_desc_validator *v) { const struct uac1_ac_header_descriptor *d = p; return d->bLength >= sizeof(*d) && d->bLength >= sizeof(*d) + d->bInCollection; } /* for mixer unit; covering all UACs */ static bool validate_mixer_unit(const void *p, const struct usb_desc_validator *v) { const struct uac_mixer_unit_descriptor *d = p; size_t len; if (d->bLength < sizeof(*d) || !d->bNrInPins) return false; len = sizeof(*d) + d->bNrInPins; /* We can't determine the bitmap size only from this unit descriptor, * so just check with the remaining length. * The actual bitmap is checked at mixer unit parser. */ switch (v->protocol) { case UAC_VERSION_1: default: len += 2 + 1; /* wChannelConfig, iChannelNames */ /* bmControls[n*m] */ len += 1; /* iMixer */ break; case UAC_VERSION_2: len += 4 + 1; /* bmChannelConfig, iChannelNames */ /* bmMixerControls[n*m] */ len += 1 + 1; /* bmControls, iMixer */ break; case UAC_VERSION_3: len += 2; /* wClusterDescrID */ /* bmMixerControls[n*m] */ break; } return d->bLength >= len; } /* both for processing and extension units; covering all UACs */ static bool validate_processing_unit(const void *p, const struct usb_desc_validator *v) { const struct uac_processing_unit_descriptor *d = p; const unsigned char *hdr = p; size_t len, m; if (d->bLength < sizeof(*d)) return false; len = sizeof(*d) + d->bNrInPins; if (d->bLength < len) return false; switch (v->protocol) { case UAC_VERSION_1: default: /* bNrChannels, wChannelConfig, iChannelNames */ len += 1 + 2 + 1; if (d->bLength < len + 1) /* bControlSize */ return false; m = hdr[len]; len += 1 + m + 1; /* bControlSize, bmControls, iProcessing */ break; case UAC_VERSION_2: /* bNrChannels, bmChannelConfig, iChannelNames */ len += 1 + 4 + 1; if (v->type == UAC2_PROCESSING_UNIT_V2) len += 2; /* bmControls -- 2 bytes for PU */ else len += 1; /* bmControls -- 1 byte for EU */ len += 1; /* iProcessing */ break; case UAC_VERSION_3: /* wProcessingDescrStr, bmControls */ len += 2 + 4; break; } if (d->bLength < len) return false; switch (v->protocol) { case UAC_VERSION_1: default: if (v->type == UAC1_EXTENSION_UNIT) return true; /* OK */ switch (le16_to_cpu(d->wProcessType)) { case UAC_PROCESS_UP_DOWNMIX: case UAC_PROCESS_DOLBY_PROLOGIC: if (d->bLength < len + 1) /* bNrModes */ return false; m = hdr[len]; len += 1 + m * 2; /* bNrModes, waModes(n) */ break; default: break; } break; case UAC_VERSION_2: if (v->type == UAC2_EXTENSION_UNIT_V2) return true; /* OK */ switch (le16_to_cpu(d->wProcessType)) { case UAC2_PROCESS_UP_DOWNMIX: case UAC2_PROCESS_DOLBY_PROLOCIC: /* SiC! */ if (d->bLength < len + 1) /* bNrModes */ return false; m = hdr[len]; len += 1 + m * 4; /* bNrModes, daModes(n) */ break; default: break; } break; case UAC_VERSION_3: if (v->type == UAC3_EXTENSION_UNIT) { len += 2; /* wClusterDescrID */ break; } switch (le16_to_cpu(d->wProcessType)) { case UAC3_PROCESS_UP_DOWNMIX: if (d->bLength < len + 1) /* bNrModes */ return false; m = hdr[len]; len += 1 + m * 2; /* bNrModes, waClusterDescrID(n) */ break; case UAC3_PROCESS_MULTI_FUNCTION: len += 2 + 4; /* wClusterDescrID, bmAlgorighms */ break; default: break; } break; } if (d->bLength < len) return false; return true; } /* both for selector and clock selector units; covering all UACs */ static bool validate_selector_unit(const void *p, const struct usb_desc_validator *v) { const struct uac_selector_unit_descriptor *d = p; size_t len; if (d->bLength < sizeof(*d)) return false; len = sizeof(*d) + d->bNrInPins; switch (v->protocol) { case UAC_VERSION_1: default: len += 1; /* iSelector */ break; case UAC_VERSION_2: len += 1 + 1; /* bmControls, iSelector */ break; case UAC_VERSION_3: len += 4 + 2; /* bmControls, wSelectorDescrStr */ break; } return d->bLength >= len; } static bool validate_uac1_feature_unit(const void *p, const struct usb_desc_validator *v) { const struct uac_feature_unit_descriptor *d = p; if (d->bLength < sizeof(*d) || !d->bControlSize) return false; /* at least bmaControls(0) for master channel + iFeature */ return d->bLength >= sizeof(*d) + d->bControlSize + 1; } static bool validate_uac2_feature_unit(const void *p, const struct usb_desc_validator *v) { const struct uac2_feature_unit_descriptor *d = p; if (d->bLength < sizeof(*d)) return false; /* at least bmaControls(0) for master channel + iFeature */ return d->bLength >= sizeof(*d) + 4 + 1; } static bool validate_uac3_feature_unit(const void *p, const struct usb_desc_validator *v) { const struct uac3_feature_unit_descriptor *d = p; if (d->bLength < sizeof(*d)) return false; /* at least bmaControls(0) for master channel + wFeatureDescrStr */ return d->bLength >= sizeof(*d) + 4 + 2; } static bool validate_uac3_power_domain_unit(const void *p, const struct usb_desc_validator *v) { const struct uac3_power_domain_descriptor *d = p; if (d->bLength < sizeof(*d)) return false; /* baEntities[] + wPDomainDescrStr */ return d->bLength >= sizeof(*d) + d->bNrEntities + 2; } static bool validate_midi_out_jack(const void *p, const struct usb_desc_validator *v) { const struct usb_midi_out_jack_descriptor *d = p; return d->bLength >= sizeof(*d) && d->bLength >= sizeof(*d) + d->bNrInputPins * 2; } #define FIXED(p, t, s) { .protocol = (p), .type = (t), .size = sizeof(s) } #define FUNC(p, t, f) { .protocol = (p), .type = (t), .func = (f) } static const struct usb_desc_validator audio_validators[] = { /* UAC1 */ FUNC(UAC_VERSION_1, UAC_HEADER, validate_uac1_header), FIXED(UAC_VERSION_1, UAC_INPUT_TERMINAL, struct uac_input_terminal_descriptor), FIXED(UAC_VERSION_1, UAC_OUTPUT_TERMINAL, struct uac1_output_terminal_descriptor), FUNC(UAC_VERSION_1, UAC_MIXER_UNIT, validate_mixer_unit), FUNC(UAC_VERSION_1, UAC_SELECTOR_UNIT, validate_selector_unit), FUNC(UAC_VERSION_1, UAC_FEATURE_UNIT, validate_uac1_feature_unit), FUNC(UAC_VERSION_1, UAC1_PROCESSING_UNIT, validate_processing_unit), FUNC(UAC_VERSION_1, UAC1_EXTENSION_UNIT, validate_processing_unit), /* UAC2 */ FIXED(UAC_VERSION_2, UAC_HEADER, struct uac2_ac_header_descriptor), FIXED(UAC_VERSION_2, UAC_INPUT_TERMINAL, struct uac2_input_terminal_descriptor), FIXED(UAC_VERSION_2, UAC_OUTPUT_TERMINAL, struct uac2_output_terminal_descriptor), FUNC(UAC_VERSION_2, UAC_MIXER_UNIT, validate_mixer_unit), FUNC(UAC_VERSION_2, UAC_SELECTOR_UNIT, validate_selector_unit), FUNC(UAC_VERSION_2, UAC_FEATURE_UNIT, validate_uac2_feature_unit), /* just a stop-gap, it should be a proper function for the array * once if the unit is really parsed/used */ FIXED(UAC_VERSION_2, UAC2_EFFECT_UNIT, struct uac2_effect_unit_descriptor), FUNC(UAC_VERSION_2, UAC2_PROCESSING_UNIT_V2, validate_processing_unit), FUNC(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2, validate_processing_unit), FIXED(UAC_VERSION_2, UAC2_CLOCK_SOURCE, struct uac_clock_source_descriptor), FUNC(UAC_VERSION_2, UAC2_CLOCK_SELECTOR, validate_selector_unit), FIXED(UAC_VERSION_2, UAC2_CLOCK_MULTIPLIER, struct uac_clock_multiplier_descriptor), /* UAC_VERSION_2, UAC2_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC3 */ FIXED(UAC_VERSION_2, UAC_HEADER, struct uac3_ac_header_descriptor), FIXED(UAC_VERSION_3, UAC_INPUT_TERMINAL, struct uac3_input_terminal_descriptor), FIXED(UAC_VERSION_3, UAC_OUTPUT_TERMINAL, struct uac3_output_terminal_descriptor), /* UAC_VERSION_3, UAC3_EXTENDED_TERMINAL: not implemented yet */ FUNC(UAC_VERSION_3, UAC3_MIXER_UNIT, validate_mixer_unit), FUNC(UAC_VERSION_3, UAC3_SELECTOR_UNIT, validate_selector_unit), FUNC(UAC_VERSION_3, UAC3_FEATURE_UNIT, validate_uac3_feature_unit), FIXED(UAC_VERSION_3, UAC3_EFFECT_UNIT, struct uac2_effect_unit_descriptor), /* sharing the same struct */ FUNC(UAC_VERSION_3, UAC3_PROCESSING_UNIT, validate_processing_unit), FUNC(UAC_VERSION_3, UAC3_EXTENSION_UNIT, validate_processing_unit), FIXED(UAC_VERSION_3, UAC3_CLOCK_SOURCE, struct uac3_clock_source_descriptor), FUNC(UAC_VERSION_3, UAC3_CLOCK_SELECTOR, validate_selector_unit), FIXED(UAC_VERSION_3, UAC3_CLOCK_MULTIPLIER, struct uac3_clock_multiplier_descriptor), /* UAC_VERSION_3, UAC3_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC_VERSION_3, UAC3_CONNECTORS: not implemented yet */ FUNC(UAC_VERSION_3, UAC3_POWER_DOMAIN, validate_uac3_power_domain_unit), { } /* terminator */ }; static const struct usb_desc_validator midi_validators[] = { FIXED(UAC_VERSION_ALL, USB_MS_HEADER, struct usb_ms_header_descriptor), FIXED(UAC_VERSION_ALL, USB_MS_MIDI_IN_JACK, struct usb_midi_in_jack_descriptor), FUNC(UAC_VERSION_ALL, USB_MS_MIDI_OUT_JACK, validate_midi_out_jack), { } /* terminator */ }; /* Validate the given unit descriptor, return true if it's OK */ static bool validate_desc(unsigned char *hdr, int protocol, const struct usb_desc_validator *v) { if (hdr[1] != USB_DT_CS_INTERFACE) return true; /* don't care */ for (; v->type; v++) { if (v->type == hdr[2] && (v->protocol == UAC_VERSION_ALL || v->protocol == protocol)) { if (v->func) return v->func(hdr, v); /* check for the fixed size */ return hdr[0] >= v->size; } } return true; /* not matching, skip validation */ } bool snd_usb_validate_audio_desc(void *p, int protocol) { unsigned char *c = p; bool valid; valid = validate_desc(p, protocol, audio_validators); if (!valid && snd_usb_skip_validation) { print_hex_dump(KERN_ERR, "USB-audio: buggy audio desc: ", DUMP_PREFIX_NONE, 16, 1, c, c[0], true); valid = true; } return valid; } bool snd_usb_validate_midi_desc(void *p) { unsigned char *c = p; bool valid; valid = validate_desc(p, UAC_VERSION_1, midi_validators); if (!valid && snd_usb_skip_validation) { print_hex_dump(KERN_ERR, "USB-audio: buggy midi desc: ", DUMP_PREFIX_NONE, 16, 1, c, c[0], true); valid = true; } return valid; }
4 4 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* Protective Load Balancing (PLB) * * PLB was designed to reduce link load imbalance across datacenter * switches. PLB is a host-based optimization; it leverages congestion * signals from the transport layer to randomly change the path of the * connection experiencing sustained congestion. PLB prefers to repath * after idle periods to minimize packet reordering. It repaths by * changing the IPv6 Flow Label on the packets of a connection, which * datacenter switches include as part of ECMP/WCMP hashing. * * PLB is described in detail in: * * Mubashir Adnan Qureshi, Yuchung Cheng, Qianwen Yin, Qiaobin Fu, * Gautam Kumar, Masoud Moshref, Junhua Yan, Van Jacobson, * David Wetherall,Abdul Kabbani: * "PLB: Congestion Signals are Simple and Effective for * Network Load Balancing" * In ACM SIGCOMM 2022, Amsterdam Netherlands. * */ #include <net/tcp.h> /* Called once per round-trip to update PLB state for a connection. */ void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, const int cong_ratio) { struct net *net = sock_net(sk); if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) return; if (cong_ratio >= 0) { if (cong_ratio < READ_ONCE(net->ipv4.sysctl_tcp_plb_cong_thresh)) plb->consec_cong_rounds = 0; else if (plb->consec_cong_rounds < READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds)) plb->consec_cong_rounds++; } } EXPORT_SYMBOL_GPL(tcp_plb_update_state); /* Check whether recent congestion has been persistent enough to warrant * a load balancing decision that switches the connection to another path. */ void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb) { struct net *net = sock_net(sk); u32 max_suspend; bool forced_rehash = false, idle_rehash = false; if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) return; forced_rehash = plb->consec_cong_rounds >= READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds); /* If sender goes idle then we check whether to rehash. */ idle_rehash = READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds) && !tcp_sk(sk)->packets_out && plb->consec_cong_rounds >= READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds); if (!forced_rehash && !idle_rehash) return; /* Note that tcp_jiffies32 can wrap; we detect wraps by checking for * cases where the max suspension end is before the actual suspension * end. We clear pause_until to 0 to indicate there is no recent * RTO event that constrains PLB rehashing. */ max_suspend = 2 * READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ; if (plb->pause_until && (!before(tcp_jiffies32, plb->pause_until) || before(tcp_jiffies32 + max_suspend, plb->pause_until))) plb->pause_until = 0; if (plb->pause_until) return; sk_rethink_txhash(sk); plb->consec_cong_rounds = 0; tcp_sk(sk)->plb_rehash++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); } EXPORT_SYMBOL_GPL(tcp_plb_check_rehash); /* Upon RTO, disallow load balancing for a while, to avoid having load * balancing decisions switch traffic to a black-holed path that was * previously avoided with a sk_rethink_txhash() call at RTO time. */ void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb) { struct net *net = sock_net(sk); u32 pause; if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled)) return; pause = READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ; pause += get_random_u32_below(pause); plb->pause_until = tcp_jiffies32 + pause; /* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call * that may switch this connection to a path with completely different * congestion characteristics. */ plb->consec_cong_rounds = 0; } EXPORT_SYMBOL_GPL(tcp_plb_update_state_upon_rto);
3 2 1 1 3 1 3 1 3 1 1 3 2 1 3 3 1 3 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 // SPDX-License-Identifier: GPL-2.0-only /* * Ram backed block device driver. * * Copyright (C) 2007 Nick Piggin * Copyright (C) 2007 Novell Inc. * * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright * of their respective owners. */ #include <linux/init.h> #include <linux/initrd.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/major.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/highmem.h> #include <linux/mutex.h> #include <linux/pagemap.h> #include <linux/xarray.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/debugfs.h> #include <linux/uaccess.h> /* * Each block ramdisk device has a xarray brd_pages of pages that stores * the pages containing the block device's contents. */ struct brd_device { int brd_number; struct gendisk *brd_disk; struct list_head brd_list; /* * Backing store of pages. This is the contents of the block device. */ struct xarray brd_pages; u64 brd_nr_pages; }; /* * Look up and return a brd's page with reference grabbed for a given sector. */ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { struct page *page; XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); rcu_read_lock(); repeat: page = xas_load(&xas); if (xas_retry(&xas, page)) { xas_reset(&xas); goto repeat; } if (!page) goto out; if (!get_page_unless_zero(page)) { xas_reset(&xas); goto repeat; } if (unlikely(page != xas_reload(&xas))) { put_page(page); xas_reset(&xas); goto repeat; } out: rcu_read_unlock(); return page; } /* * Insert a new page for a given sector, if one does not already exist. * The returned page will grab reference. */ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, blk_opf_t opf) { gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; struct page *page, *ret; page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); if (!page) return ERR_PTR(-ENOMEM); xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); if (!ret) { brd->brd_nr_pages++; get_page(page); xa_unlock(&brd->brd_pages); return page; } if (!xa_is_err(ret)) { get_page(ret); xa_unlock(&brd->brd_pages); put_page(page); return ret; } xa_unlock(&brd->brd_pages); put_page(page); return ERR_PTR(xa_err(ret)); } /* * Free all backing store pages and xarray. This must only be called when * there are no other users of the device. */ static void brd_free_pages(struct brd_device *brd) { struct page *page; pgoff_t idx; xa_for_each(&brd->brd_pages, idx, page) { put_page(page); cond_resched(); } xa_destroy(&brd->brd_pages); } /* * Process a single segment. The segment is capped to not cross page boundaries * in both the bio and the brd backing memory. */ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) { struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); sector_t sector = bio->bi_iter.bi_sector; u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT; blk_opf_t opf = bio->bi_opf; struct page *page; void *kaddr; bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); page = brd_lookup_page(brd, sector); if (!page && op_is_write(opf)) { page = brd_insert_page(brd, sector, opf); if (IS_ERR(page)) goto out_error; } kaddr = bvec_kmap_local(&bv); if (op_is_write(opf)) { memcpy_to_page(page, offset, kaddr, bv.bv_len); } else { if (page) memcpy_from_page(kaddr, page, offset, bv.bv_len); else memset(kaddr, 0, bv.bv_len); } kunmap_local(kaddr); bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); if (page) put_page(page); return true; out_error: if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) bio_wouldblock_error(bio); else bio_io_error(bio); return false; } static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { sector_t aligned_sector = round_up(sector, PAGE_SECTORS); sector_t aligned_end = round_down( sector + (size >> SECTOR_SHIFT), PAGE_SECTORS); struct page *page; if (aligned_end <= aligned_sector) return; xa_lock(&brd->brd_pages); while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { put_page(page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; } xa_unlock(&brd->brd_pages); } static void brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; if (unlikely(op_is_discard(bio->bi_opf))) { brd_do_discard(brd, bio->bi_iter.bi_sector, bio->bi_iter.bi_size); bio_endio(bio); return; } do { if (!brd_rw_bvec(brd, bio)) return; } while (bio->bi_iter.bi_size); bio_endio(bio); } static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, .submit_bio = brd_submit_bio, }; /* * And now the modules code and kernel interface. */ static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; module_param(rd_nr, int, 0444); MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; module_param(rd_size, ulong, 0444); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); static int max_part = 1; module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); MODULE_DESCRIPTION("Ram backed block device driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); MODULE_ALIAS("rd"); #ifndef MODULE /* Legacy boot options - nonmodular */ static int __init ramdisk_size(char *str) { rd_size = simple_strtol(str, NULL, 0); return 1; } __setup("ramdisk_size=", ramdisk_size); #endif /* * The device scheme is derived from loop.c. Keep them in synch where possible * (should share code eventually). */ static LIST_HEAD(brd_devices); static DEFINE_MUTEX(brd_devices_mutex); static struct dentry *brd_debugfs_dir; static struct brd_device *brd_find_or_alloc_device(int i) { struct brd_device *brd; mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { if (brd->brd_number == i) { mutex_unlock(&brd_devices_mutex); return ERR_PTR(-EEXIST); } } brd = kzalloc(sizeof(*brd), GFP_KERNEL); if (!brd) { mutex_unlock(&brd_devices_mutex); return ERR_PTR(-ENOMEM); } brd->brd_number = i; list_add_tail(&brd->brd_list, &brd_devices); mutex_unlock(&brd_devices_mutex); return brd; } static void brd_free_device(struct brd_device *brd) { mutex_lock(&brd_devices_mutex); list_del(&brd->brd_list); mutex_unlock(&brd_devices_mutex); kfree(brd); } static int brd_alloc(int i) { struct brd_device *brd; struct gendisk *disk; char buf[DISK_NAME_LEN]; int err = -ENOMEM; struct queue_limits lim = { /* * This is so fdisk will align partitions on 4k, because of * direct_access API needing 4k alignment, returning a PFN * (This is only a problem on very small devices <= 4M, * otherwise fdisk will align on 1M. Regardless this call * is harmless) */ .physical_block_size = PAGE_SIZE, .max_hw_discard_sectors = UINT_MAX, .max_discard_segments = 1, .discard_granularity = PAGE_SIZE, .features = BLK_FEAT_SYNCHRONOUS | BLK_FEAT_NOWAIT, }; brd = brd_find_or_alloc_device(i); if (IS_ERR(brd)) return PTR_ERR(brd); xa_init(&brd->brd_pages); snprintf(buf, DISK_NAME_LEN, "ram%d", i); if (!IS_ERR_OR_NULL(brd_debugfs_dir)) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_free_dev; } disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; disk->minors = max_part; disk->fops = &brd_fops; disk->private_data = brd; strscpy(disk->disk_name, buf, DISK_NAME_LEN); set_capacity(disk, rd_size * 2); err = add_disk(disk); if (err) goto out_cleanup_disk; return 0; out_cleanup_disk: put_disk(disk); out_free_dev: brd_free_device(brd); return err; } static void brd_probe(dev_t dev) { brd_alloc(MINOR(dev) / max_part); } static void brd_cleanup(void) { struct brd_device *brd, *next; debugfs_remove_recursive(brd_debugfs_dir); list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { del_gendisk(brd->brd_disk); put_disk(brd->brd_disk); brd_free_pages(brd); brd_free_device(brd); } } static inline void brd_check_and_reset_par(void) { if (unlikely(!max_part)) max_part = 1; /* * make sure 'max_part' can be divided exactly by (1U << MINORBITS), * otherwise, it is possiable to get same dev_t when adding partitions. */ if ((1U << MINORBITS) % max_part != 0) max_part = 1UL << fls(max_part); if (max_part > DISK_MAX_PARTS) { pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", DISK_MAX_PARTS, DISK_MAX_PARTS); max_part = DISK_MAX_PARTS; } } static int __init brd_init(void) { int err, i; /* * brd module now has a feature to instantiate underlying device * structure on-demand, provided that there is an access dev node. * * (1) if rd_nr is specified, create that many upfront. else * it defaults to CONFIG_BLK_DEV_RAM_COUNT * (2) User can further extend brd devices by create dev node themselves * and have kernel automatically instantiate actual device * on-demand. Example: * mknod /path/devnod_name b 1 X # 1 is the rd major * fdisk -l /path/devnod_name * If (X / max_part) was not already created it will be created * dynamically. */ brd_check_and_reset_par(); brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL); if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) { err = -EIO; goto out_free; } for (i = 0; i < rd_nr; i++) brd_alloc(i); pr_info("brd: module loaded\n"); return 0; out_free: brd_cleanup(); pr_info("brd: module NOT loaded !!!\n"); return err; } static void __exit brd_exit(void) { unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); brd_cleanup(); pr_info("brd: module unloaded\n"); } module_init(brd_init); module_exit(brd_exit);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 // SPDX-License-Identifier: GPL-2.0+ /* * fl512.c * Anders Gnistrup <ex18@kalman.iau.dtu.dk> * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2000 David A. Schleef <ds@schleef.org> */ /* * Driver: fl512 * Description: unknown * Author: Anders Gnistrup <ex18@kalman.iau.dtu.dk> * Devices: [unknown] FL512 (fl512) * Status: unknown * * Digital I/O is not supported. * * Configuration options: * [0] - I/O port base address */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #include <linux/delay.h> /* * Register I/O map */ #define FL512_AI_LSB_REG 0x02 #define FL512_AI_MSB_REG 0x03 #define FL512_AI_MUX_REG 0x02 #define FL512_AI_START_CONV_REG 0x03 #define FL512_AO_DATA_REG(x) (0x04 + ((x) * 2)) #define FL512_AO_TRIG_REG(x) (0x04 + ((x) * 2)) static const struct comedi_lrange range_fl512 = { 4, { BIP_RANGE(0.5), BIP_RANGE(1), BIP_RANGE(5), BIP_RANGE(10), UNI_RANGE(1), UNI_RANGE(5), UNI_RANGE(10) } }; static int fl512_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val; int i; outb(chan, dev->iobase + FL512_AI_MUX_REG); for (i = 0; i < insn->n; i++) { outb(0, dev->iobase + FL512_AI_START_CONV_REG); /* XXX should test "done" flag instead of delay */ usleep_range(30, 100); val = inb(dev->iobase + FL512_AI_LSB_REG); val |= (inb(dev->iobase + FL512_AI_MSB_REG) << 8); val &= s->maxdata; data[i] = val; } return insn->n; } static int fl512_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val = s->readback[chan]; int i; for (i = 0; i < insn->n; i++) { val = data[i]; /* write LSB, MSB then trigger conversion */ outb(val & 0x0ff, dev->iobase + FL512_AO_DATA_REG(chan)); outb((val >> 8) & 0xf, dev->iobase + FL512_AO_DATA_REG(chan)); inb(dev->iobase + FL512_AO_TRIG_REG(chan)); } s->readback[chan] = val; return insn->n; } static int fl512_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct comedi_subdevice *s; int ret; ret = comedi_request_region(dev, it->options[0], 0x10); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND; s->n_chan = 16; s->maxdata = 0x0fff; s->range_table = &range_fl512; s->insn_read = fl512_ai_insn_read; /* Analog Output subdevice */ s = &dev->subdevices[1]; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE; s->n_chan = 2; s->maxdata = 0x0fff; s->range_table = &range_fl512; s->insn_write = fl512_ao_insn_write; return comedi_alloc_subdev_readback(s); } static struct comedi_driver fl512_driver = { .driver_name = "fl512", .module = THIS_MODULE, .attach = fl512_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(fl512_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi low-level driver"); MODULE_LICENSE("GPL");
1 4 112 113 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Checksumming functions for IPv6 * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Borrows very liberally from tcp.c and ip.c, see those * files for more names. */ /* * Fixes: * * Ralf Baechle : generic ipv6 checksum * <ralf@waldorf-gmbh.de> */ #ifndef _CHECKSUM_IPV6_H #define _CHECKSUM_IPV6_H #include <asm/types.h> #include <asm/byteorder.h> #include <net/ip.h> #include <asm/checksum.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/ipv6.h> #ifndef _HAVE_ARCH_IPV6_CSUM __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum csum); #endif static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) { return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, proto, 0)); } static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline void __tcp_v6_send_check(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct tcphdr *th = tcp_hdr(skb); th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); ipv6h->payload_len = 0; th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0); } static inline __sum16 udp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp6_set_csum(bool nocheck, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len); int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif
13 10 10 10 1 13 1 1 6 11 11 14 12 6 12 2 2 1 1 1 1 5 4 2 2 2 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 // SPDX-License-Identifier: GPL-2.0-or-later /* AFS dynamic root handling * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/fs.h> #include <linux/namei.h> #include <linux/dns_resolver.h> #include "internal.h" #define AFS_MIN_DYNROOT_CELL_INO 4 /* Allow for ., .., @cell, .@cell */ #define AFS_MAX_DYNROOT_CELL_INO ((unsigned int)INT_MAX) static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino); /* * iget5() comparator for inode created by autocell operations */ static int afs_iget5_pseudo_test(struct inode *inode, void *opaque) { struct afs_fid *fid = opaque; return inode->i_ino == fid->vnode; } /* * iget5() inode initialiser */ static int afs_iget5_pseudo_set(struct inode *inode, void *opaque) { struct afs_super_info *as = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_fid *fid = opaque; vnode->volume = as->volume; vnode->fid = *fid; inode->i_ino = fid->vnode; inode->i_generation = fid->unique; return 0; } /* * Create an inode for an autocell dynamic automount dir. */ static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino) { struct afs_vnode *vnode; struct inode *inode; struct afs_fid fid = { .vnode = ino, .unique = 1, }; _enter(""); inode = iget5_locked(sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", inode, inode->i_ino, fid.vid, fid.vnode, fid.unique); vnode = AFS_FS_I(inode); if (inode->i_state & I_NEW) { netfs_inode_init(&vnode->netfs, NULL, false); simple_inode_init_ts(inode); set_nlink(inode, 2); inode->i_size = 0; inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_autocell_inode_operations; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_generation = 0; inode->i_flags |= S_AUTOMOUNT | S_NOATIME; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); unlock_new_inode(inode); } _leave(" = %p", inode); return inode; } /* * Try to automount the mountpoint with pseudo directory, if the autocell * option is set. */ static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct afs_cell *cell = NULL; struct afs_net *net = afs_d2net(dentry); struct inode *inode = NULL; const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; bool dotted = false; int ret = -ENOENT; /* Names prefixed with a dot are R/W mounts. */ if (name[0] == '.') { name++; len--; dotted = true; } cell = afs_lookup_cell(net, name, len, NULL, false, afs_cell_trace_use_lookup_dynroot); if (IS_ERR(cell)) { ret = PTR_ERR(cell); goto out_no_cell; } inode = afs_iget_pseudo_dir(dir->i_sb, cell->dynroot_ino * 2 + dotted); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out; } dentry->d_fsdata = cell; return d_splice_alias(inode, dentry); out: afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_dynroot); out_no_cell: if (!inode) return d_splice_alias(inode, dentry); return ret == -ENOENT ? NULL : ERR_PTR(ret); } /* * Look up an entry in a dynroot directory. */ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { _enter("%pd", dentry); if (flags & LOOKUP_CREATE) return ERR_PTR(-EOPNOTSUPP); if (dentry->d_name.len >= AFSNAMEMAX) { _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } if (dentry->d_name.len == 5 && memcmp(dentry->d_name.name, "@cell", 5) == 0) return afs_lookup_atcell(dir, dentry, 2); if (dentry->d_name.len == 6 && memcmp(dentry->d_name.name, ".@cell", 6) == 0) return afs_lookup_atcell(dir, dentry, 3); return afs_dynroot_lookup_cell(dir, dentry, flags); } const struct inode_operations afs_dynroot_inode_operations = { .lookup = afs_dynroot_lookup, }; static void afs_dynroot_d_release(struct dentry *dentry) { struct afs_cell *cell = dentry->d_fsdata; afs_unuse_cell(cell, afs_cell_trace_unuse_dynroot_mntpt); } /* * Keep @cell symlink dentries around, but only keep cell autodirs when they're * being used. */ static int afs_dynroot_delete_dentry(const struct dentry *dentry) { const struct qstr *name = &dentry->d_name; if (name->len == 5 && memcmp(name->name, "@cell", 5) == 0) return 0; if (name->len == 6 && memcmp(name->name, ".@cell", 6) == 0) return 0; return 1; } const struct dentry_operations afs_dynroot_dentry_operations = { .d_delete = afs_dynroot_delete_dentry, .d_release = afs_dynroot_d_release, .d_automount = afs_d_automount, }; static void afs_atcell_delayed_put_cell(void *arg) { struct afs_cell *cell = arg; afs_put_cell(cell, afs_cell_trace_put_atcell); } /* * Read @cell or .@cell symlinks. */ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_cell *cell; struct afs_net *net = afs_i2net(inode); const char *name; bool dotted = vnode->fid.vnode == 3; if (!rcu_access_pointer(net->ws_cell)) return ERR_PTR(-ENOENT); if (!dentry) { /* We're in RCU-pathwalk. */ cell = rcu_dereference(net->ws_cell); if (dotted) name = cell->name - 1; else name = cell->name; /* Shouldn't need to set a delayed call. */ return name; } down_read(&net->cells_lock); cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock)); if (dotted) name = cell->name - 1; else name = cell->name; afs_get_cell(cell, afs_cell_trace_get_atcell); set_delayed_call(done, afs_atcell_delayed_put_cell, cell); up_read(&net->cells_lock); return name; } static const struct inode_operations afs_atcell_inode_operations = { .get_link = afs_atcell_get_link, }; /* * Create an inode for the @cell or .@cell symlinks. */ static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino) { struct afs_vnode *vnode; struct inode *inode; struct afs_fid fid = { .vnode = ino, .unique = 1, }; inode = iget5_locked(dir->i_sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) return ERR_PTR(-ENOMEM); vnode = AFS_FS_I(inode); if (inode->i_state & I_NEW) { netfs_inode_init(&vnode->netfs, NULL, false); simple_inode_init_ts(inode); set_nlink(inode, 1); inode->i_size = 0; inode->i_mode = S_IFLNK | 0555; inode->i_op = &afs_atcell_inode_operations; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_generation = 0; inode->i_flags |= S_NOATIME; unlock_new_inode(inode); } return d_splice_alias(inode, dentry); } /* * Transcribe the cell database into readdir content under the RCU read lock. * Each cell produces two entries, one prefixed with a dot and one not. */ static int afs_dynroot_readdir_cells(struct afs_net *net, struct dir_context *ctx) { const struct afs_cell *cell; loff_t newpos; _enter("%llu", ctx->pos); for (;;) { unsigned int ix = ctx->pos >> 1; cell = idr_get_next(&net->cells_dyn_ino, &ix); if (!cell) return 0; if (READ_ONCE(cell->state) == AFS_CELL_REMOVING || READ_ONCE(cell->state) == AFS_CELL_DEAD) { ctx->pos += 2; ctx->pos &= ~1; continue; } newpos = ix << 1; if (newpos > ctx->pos) ctx->pos = newpos; _debug("pos %llu -> cell %u", ctx->pos, cell->dynroot_ino); if ((ctx->pos & 1) == 0) { if (!dir_emit(ctx, cell->name, cell->name_len, cell->dynroot_ino, DT_DIR)) return 0; ctx->pos++; } if ((ctx->pos & 1) == 1) { if (!dir_emit(ctx, cell->name - 1, cell->name_len + 1, cell->dynroot_ino + 1, DT_DIR)) return 0; ctx->pos++; } } return 0; } /* * Read the AFS dynamic root directory. This produces a list of cellnames, * dotted and undotted, along with @cell and .@cell links if configured. */ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) { struct afs_net *net = afs_d2net(file->f_path.dentry); int ret = 0; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) { if (rcu_access_pointer(net->ws_cell) && !dir_emit(ctx, "@cell", 5, 2, DT_LNK)) return 0; ctx->pos = 3; } if (ctx->pos == 3) { if (rcu_access_pointer(net->ws_cell) && !dir_emit(ctx, ".@cell", 6, 3, DT_LNK)) return 0; ctx->pos = 4; } if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { down_read(&net->cells_lock); ret = afs_dynroot_readdir_cells(net, ctx); up_read(&net->cells_lock); } return ret; } static const struct file_operations afs_dynroot_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = afs_dynroot_readdir, .fsync = noop_fsync, }; /* * Create an inode for a dynamic root directory. */ struct inode *afs_dynroot_iget_root(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); struct afs_vnode *vnode; struct inode *inode; struct afs_fid fid = { .vid = 0, .vnode = 1, .unique = 1,}; if (as->volume) fid.vid = as->volume->vid; inode = iget5_locked(sb, fid.vnode, afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); if (!inode) return ERR_PTR(-ENOMEM); vnode = AFS_FS_I(inode); /* there shouldn't be an existing inode */ if (inode->i_state & I_NEW) { netfs_inode_init(&vnode->netfs, NULL, false); simple_inode_init_ts(inode); set_nlink(inode, 2); inode->i_size = 0; inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_dynroot_inode_operations; inode->i_fop = &afs_dynroot_file_operations; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_generation = 0; inode->i_flags |= S_NOATIME; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); unlock_new_inode(inode); } _leave(" = %p", inode); return inode; }
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/netfilter.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/moduleparam.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_timestamp.h> static bool nf_ct_tstamp __read_mostly; module_param_named(tstamp, nf_ct_tstamp, bool, 0644); MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); void nf_conntrack_tstamp_pernet_init(struct net *net) { net->ct.sysctl_tstamp = nf_ct_tstamp; }
4 4 5 3 4 4 1 1 1 1 11 7 7 1 1 1 12 12 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Bluetooth virtual HCI driver * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com> * Copyright (C) 2004-2006 Marcel Holtmann <marcel@holtmann.org> */ #include <linux/module.h> #include <linux/unaligned.h> #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/poll.h> #include <linux/skbuff.h> #include <linux/miscdevice.h> #include <linux/debugfs.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #define VERSION "1.5" static bool amp; struct vhci_data { struct hci_dev *hdev; wait_queue_head_t read_wait; struct sk_buff_head readq; struct mutex open_mutex; struct delayed_work open_timeout; struct work_struct suspend_work; bool suspended; bool wakeup; __u16 msft_opcode; bool aosp_capable; atomic_t initialized; }; static int vhci_open_dev(struct hci_dev *hdev) { return 0; } static int vhci_close_dev(struct hci_dev *hdev) { struct vhci_data *data = hci_get_drvdata(hdev); skb_queue_purge(&data->readq); return 0; } static int vhci_flush(struct hci_dev *hdev) { struct vhci_data *data = hci_get_drvdata(hdev); skb_queue_purge(&data->readq); return 0; } static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct vhci_data *data = hci_get_drvdata(hdev); memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); skb_queue_tail(&data->readq, skb); if (atomic_read(&data->initialized)) wake_up_interruptible(&data->read_wait); return 0; } static int vhci_get_data_path_id(struct hci_dev *hdev, u8 *data_path_id) { *data_path_id = 0; return 0; } static int vhci_get_codec_config_data(struct hci_dev *hdev, __u8 type, struct bt_codec *codec, __u8 *vnd_len, __u8 **vnd_data) { if (type != ESCO_LINK) return -EINVAL; *vnd_len = 0; *vnd_data = NULL; return 0; } static bool vhci_wakeup(struct hci_dev *hdev) { struct vhci_data *data = hci_get_drvdata(hdev); return data->wakeup; } static ssize_t force_suspend_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; char buf[3]; buf[0] = data->suspended ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } static void vhci_suspend_work(struct work_struct *work) { struct vhci_data *data = container_of(work, struct vhci_data, suspend_work); if (data->suspended) hci_suspend_dev(data->hdev); else hci_resume_dev(data->hdev); } static ssize_t force_suspend_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; bool enable; int err; err = kstrtobool_from_user(user_buf, count, &enable); if (err) return err; if (data->suspended == enable) return -EALREADY; data->suspended = enable; schedule_work(&data->suspend_work); return count; } static const struct file_operations force_suspend_fops = { .open = simple_open, .read = force_suspend_read, .write = force_suspend_write, .llseek = default_llseek, }; static ssize_t force_wakeup_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; char buf[3]; buf[0] = data->wakeup ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } static ssize_t force_wakeup_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; bool enable; int err; err = kstrtobool_from_user(user_buf, count, &enable); if (err) return err; if (data->wakeup == enable) return -EALREADY; data->wakeup = enable; return count; } static const struct file_operations force_wakeup_fops = { .open = simple_open, .read = force_wakeup_read, .write = force_wakeup_write, .llseek = default_llseek, }; static int msft_opcode_set(void *data, u64 val) { struct vhci_data *vhci = data; if (val > 0xffff || hci_opcode_ogf(val) != 0x3f) return -EINVAL; if (vhci->msft_opcode) return -EALREADY; vhci->msft_opcode = val; return 0; } static int msft_opcode_get(void *data, u64 *val) { struct vhci_data *vhci = data; *val = vhci->msft_opcode; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(msft_opcode_fops, msft_opcode_get, msft_opcode_set, "%llu\n"); static ssize_t aosp_capable_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *vhci = file->private_data; char buf[3]; buf[0] = vhci->aosp_capable ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } static ssize_t aosp_capable_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *vhci = file->private_data; bool enable; int err; err = kstrtobool_from_user(user_buf, count, &enable); if (err) return err; if (!enable) return -EINVAL; if (vhci->aosp_capable) return -EALREADY; vhci->aosp_capable = enable; return count; } static const struct file_operations aosp_capable_fops = { .open = simple_open, .read = aosp_capable_read, .write = aosp_capable_write, .llseek = default_llseek, }; static int vhci_setup(struct hci_dev *hdev) { struct vhci_data *vhci = hci_get_drvdata(hdev); if (vhci->msft_opcode) hci_set_msft_opcode(hdev, vhci->msft_opcode); if (vhci->aosp_capable) hci_set_aosp_capable(hdev); return 0; } static void vhci_coredump(struct hci_dev *hdev) { /* No need to do anything */ } static void vhci_coredump_hdr(struct hci_dev *hdev, struct sk_buff *skb) { const char *buf; buf = "Controller Name: vhci_ctrl\n"; skb_put_data(skb, buf, strlen(buf)); buf = "Firmware Version: vhci_fw\n"; skb_put_data(skb, buf, strlen(buf)); buf = "Driver: vhci_drv\n"; skb_put_data(skb, buf, strlen(buf)); buf = "Vendor: vhci\n"; skb_put_data(skb, buf, strlen(buf)); } #define MAX_COREDUMP_LINE_LEN 40 struct devcoredump_test_data { enum devcoredump_state state; unsigned int timeout; char data[MAX_COREDUMP_LINE_LEN]; }; static inline void force_devcd_timeout(struct hci_dev *hdev, unsigned int timeout) { #ifdef CONFIG_DEV_COREDUMP hdev->dump.timeout = secs_to_jiffies(timeout); #endif } static ssize_t force_devcd_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; struct hci_dev *hdev = data->hdev; struct sk_buff *skb = NULL; struct devcoredump_test_data dump_data; size_t data_size; int ret; if (count < offsetof(struct devcoredump_test_data, data) || count > sizeof(dump_data)) return -EINVAL; if (copy_from_user(&dump_data, user_buf, count)) return -EFAULT; data_size = count - offsetof(struct devcoredump_test_data, data); skb = alloc_skb(data_size, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_put_data(skb, &dump_data.data, data_size); hci_devcd_register(hdev, vhci_coredump, vhci_coredump_hdr, NULL); /* Force the devcoredump timeout */ if (dump_data.timeout) force_devcd_timeout(hdev, dump_data.timeout); ret = hci_devcd_init(hdev, skb->len); if (ret) { BT_ERR("Failed to generate devcoredump"); kfree_skb(skb); return ret; } hci_devcd_append(hdev, skb); switch (dump_data.state) { case HCI_DEVCOREDUMP_DONE: hci_devcd_complete(hdev); break; case HCI_DEVCOREDUMP_ABORT: hci_devcd_abort(hdev); break; case HCI_DEVCOREDUMP_TIMEOUT: /* Do nothing */ break; default: return -EINVAL; } return count; } static const struct file_operations force_devcoredump_fops = { .open = simple_open, .write = force_devcd_write, }; static void vhci_debugfs_init(struct vhci_data *data) { struct hci_dev *hdev = data->hdev; debugfs_create_file("force_suspend", 0644, hdev->debugfs, data, &force_suspend_fops); debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data, &force_wakeup_fops); if (IS_ENABLED(CONFIG_BT_MSFTEXT)) debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data, &msft_opcode_fops); if (IS_ENABLED(CONFIG_BT_AOSPEXT)) debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data, &aosp_capable_fops); debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data, &force_devcoredump_fops); } static int __vhci_create_device(struct vhci_data *data, __u8 opcode) { struct hci_dev *hdev; struct sk_buff *skb; if (data->hdev) return -EBADFD; /* bits 2-5 are reserved (must be zero) */ if (opcode & 0x3c) return -EINVAL; skb = bt_skb_alloc(4, GFP_KERNEL); if (!skb) return -ENOMEM; hdev = hci_alloc_dev(); if (!hdev) { kfree_skb(skb); return -ENOMEM; } data->hdev = hdev; hdev->bus = HCI_VIRTUAL; hci_set_drvdata(hdev, data); hdev->open = vhci_open_dev; hdev->close = vhci_close_dev; hdev->flush = vhci_flush; hdev->send = vhci_send_frame; hdev->get_data_path_id = vhci_get_data_path_id; hdev->get_codec_config_data = vhci_get_codec_config_data; hdev->wakeup = vhci_wakeup; hdev->setup = vhci_setup; hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); hci_set_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED); /* bit 6 is for external configuration */ if (opcode & 0x40) hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); /* bit 7 is for raw device */ if (opcode & 0x80) hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); data->hdev = NULL; kfree_skb(skb); return -EBUSY; } if (!IS_ERR_OR_NULL(hdev->debugfs)) vhci_debugfs_init(data); hci_skb_pkt_type(skb) = HCI_VENDOR_PKT; skb_put_u8(skb, 0xff); skb_put_u8(skb, opcode); put_unaligned_le16(hdev->id, skb_put(skb, 2)); skb_queue_head(&data->readq, skb); atomic_inc(&data->initialized); wake_up_interruptible(&data->read_wait); return 0; } static int vhci_create_device(struct vhci_data *data, __u8 opcode) { int err; mutex_lock(&data->open_mutex); err = __vhci_create_device(data, opcode); mutex_unlock(&data->open_mutex); return err; } static inline ssize_t vhci_get_user(struct vhci_data *data, struct iov_iter *from) { size_t len = iov_iter_count(from); struct sk_buff *skb; __u8 pkt_type, opcode; int ret; if (len < 2 || len > HCI_MAX_FRAME_SIZE) return -EINVAL; skb = bt_skb_alloc(len, GFP_KERNEL); if (!skb) return -ENOMEM; if (!copy_from_iter_full(skb_put(skb, len), len, from)) { kfree_skb(skb); return -EFAULT; } pkt_type = *((__u8 *) skb->data); skb_pull(skb, 1); switch (pkt_type) { case HCI_EVENT_PKT: case HCI_ACLDATA_PKT: case HCI_SCODATA_PKT: case HCI_ISODATA_PKT: if (!data->hdev) { kfree_skb(skb); return -ENODEV; } hci_skb_pkt_type(skb) = pkt_type; ret = hci_recv_frame(data->hdev, skb); break; case HCI_VENDOR_PKT: cancel_delayed_work_sync(&data->open_timeout); opcode = *((__u8 *) skb->data); skb_pull(skb, 1); if (skb->len > 0) { kfree_skb(skb); return -EINVAL; } kfree_skb(skb); ret = vhci_create_device(data, opcode); break; default: kfree_skb(skb); return -EINVAL; } return (ret < 0) ? ret : len; } static inline ssize_t vhci_put_user(struct vhci_data *data, struct sk_buff *skb, char __user *buf, int count) { char __user *ptr = buf; int len; len = min_t(unsigned int, skb->len, count); if (copy_to_user(ptr, skb->data, len)) return -EFAULT; if (!data->hdev) return len; data->hdev->stat.byte_tx += len; switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: data->hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: data->hdev->stat.acl_tx++; break; case HCI_SCODATA_PKT: data->hdev->stat.sco_tx++; break; } return len; } static ssize_t vhci_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct vhci_data *data = file->private_data; struct sk_buff *skb; ssize_t ret = 0; while (count) { skb = skb_dequeue(&data->readq); if (skb) { ret = vhci_put_user(data, skb, buf, count); if (ret < 0) skb_queue_head(&data->readq, skb); else kfree_skb(skb); break; } if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } ret = wait_event_interruptible(data->read_wait, !skb_queue_empty(&data->readq)); if (ret < 0) break; } return ret; } static ssize_t vhci_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhci_data *data = file->private_data; return vhci_get_user(data, from); } static __poll_t vhci_poll(struct file *file, poll_table *wait) { struct vhci_data *data = file->private_data; poll_wait(file, &data->read_wait, wait); if (!skb_queue_empty(&data->readq)) return EPOLLIN | EPOLLRDNORM; return EPOLLOUT | EPOLLWRNORM; } static void vhci_open_timeout(struct work_struct *work) { struct vhci_data *data = container_of(work, struct vhci_data, open_timeout.work); vhci_create_device(data, 0x00); } static int vhci_open(struct inode *inode, struct file *file) { struct vhci_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; skb_queue_head_init(&data->readq); init_waitqueue_head(&data->read_wait); mutex_init(&data->open_mutex); INIT_DELAYED_WORK(&data->open_timeout, vhci_open_timeout); INIT_WORK(&data->suspend_work, vhci_suspend_work); file->private_data = data; nonseekable_open(inode, file); schedule_delayed_work(&data->open_timeout, secs_to_jiffies(1)); return 0; } static void vhci_debugfs_remove(struct hci_dev *hdev) { debugfs_lookup_and_remove("force_suspend", hdev->debugfs); debugfs_lookup_and_remove("force_wakeup", hdev->debugfs); if (IS_ENABLED(CONFIG_BT_MSFTEXT)) debugfs_lookup_and_remove("msft_opcode", hdev->debugfs); if (IS_ENABLED(CONFIG_BT_AOSPEXT)) debugfs_lookup_and_remove("aosp_capable", hdev->debugfs); debugfs_lookup_and_remove("force_devcoredump", hdev->debugfs); } static int vhci_release(struct inode *inode, struct file *file) { struct vhci_data *data = file->private_data; struct hci_dev *hdev; cancel_delayed_work_sync(&data->open_timeout); flush_work(&data->suspend_work); hdev = data->hdev; if (hdev) { if (!IS_ERR_OR_NULL(hdev->debugfs)) vhci_debugfs_remove(hdev); hci_unregister_dev(hdev); hci_free_dev(hdev); } skb_queue_purge(&data->readq); file->private_data = NULL; kfree(data); return 0; } static const struct file_operations vhci_fops = { .owner = THIS_MODULE, .read = vhci_read, .write_iter = vhci_write, .poll = vhci_poll, .open = vhci_open, .release = vhci_release, }; static struct miscdevice vhci_miscdev = { .name = "vhci", .fops = &vhci_fops, .minor = VHCI_MINOR, }; module_misc_device(vhci_miscdev); module_param(amp, bool, 0644); MODULE_PARM_DESC(amp, "Create AMP controller device"); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS("devname:vhci"); MODULE_ALIAS_MISCDEV(VHCI_MINOR);
7 8 5375 5379 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 // SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * * NOHZ implementation for low and high resolution timers * * Started by: Thomas Gleixner and Ingo Molnar */ #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/percpu.h> #include <linux/nmi.h> #include <linux/profile.h> #include <linux/sched/signal.h> #include <linux/sched/clock.h> #include <linux/sched/stat.h> #include <linux/sched/nohz.h> #include <linux/sched/loadavg.h> #include <linux/module.h> #include <linux/irq_work.h> #include <linux/posix-timers.h> #include <linux/context_tracking.h> #include <linux/mm.h> #include <asm/irq_regs.h> #include "tick-internal.h" #include <trace/events/timer.h> /* * Per-CPU nohz control structure */ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); struct tick_sched *tick_get_tick_sched(int cpu) { return &per_cpu(tick_cpu_sched, cpu); } /* * The time when the last jiffy update happened. Write access must hold * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a * consistent view of jiffies and last_jiffies_update. */ static ktime_t last_jiffies_update; /* * Must be called with interrupts disabled ! */ static void tick_do_update_jiffies64(ktime_t now) { unsigned long ticks = 1; ktime_t delta, nextp; /* * 64-bit can do a quick check without holding the jiffies lock and * without looking at the sequence count. The smp_load_acquire() * pairs with the update done later in this function. * * 32-bit cannot do that because the store of 'tick_next_period' * consists of two 32-bit stores, and the first store could be * moved by the CPU to a random point in the future. */ if (IS_ENABLED(CONFIG_64BIT)) { if (ktime_before(now, smp_load_acquire(&tick_next_period))) return; } else { unsigned int seq; /* * Avoid contention on 'jiffies_lock' and protect the quick * check with the sequence count. */ do { seq = read_seqcount_begin(&jiffies_seq); nextp = tick_next_period; } while (read_seqcount_retry(&jiffies_seq, seq)); if (ktime_before(now, nextp)) return; } /* Quick check failed, i.e. update is required. */ raw_spin_lock(&jiffies_lock); /* * Re-evaluate with the lock held. Another CPU might have done the * update already. */ if (ktime_before(now, tick_next_period)) { raw_spin_unlock(&jiffies_lock); return; } write_seqcount_begin(&jiffies_seq); delta = ktime_sub(now, tick_next_period); if (unlikely(delta >= TICK_NSEC)) { /* Slow path for long idle sleep times */ s64 incr = TICK_NSEC; ticks += ktime_divns(delta, incr); last_jiffies_update = ktime_add_ns(last_jiffies_update, incr * ticks); } else { last_jiffies_update = ktime_add_ns(last_jiffies_update, TICK_NSEC); } /* Advance jiffies to complete the 'jiffies_seq' protected job */ jiffies_64 += ticks; /* Keep the tick_next_period variable up to date */ nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC); if (IS_ENABLED(CONFIG_64BIT)) { /* * Pairs with smp_load_acquire() in the lockless quick * check above, and ensures that the update to 'jiffies_64' is * not reordered vs. the store to 'tick_next_period', neither * by the compiler nor by the CPU. */ smp_store_release(&tick_next_period, nextp); } else { /* * A plain store is good enough on 32-bit, as the quick check * above is protected by the sequence count. */ tick_next_period = nextp; } /* * Release the sequence count. calc_global_load() below is not * protected by it, but 'jiffies_lock' needs to be held to prevent * concurrent invocations. */ write_seqcount_end(&jiffies_seq); calc_global_load(); raw_spin_unlock(&jiffies_lock); update_wall_time(); } /* * Initialize and return retrieve the jiffies update. */ static ktime_t tick_init_jiffy_update(void) { ktime_t period; raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); /* Have we started the jiffies update yet ? */ if (last_jiffies_update == 0) { u32 rem; /* * Ensure that the tick is aligned to a multiple of * TICK_NSEC. */ div_u64_rem(tick_next_period, TICK_NSEC, &rem); if (rem) tick_next_period += TICK_NSEC - rem; last_jiffies_update = tick_next_period; } period = last_jiffies_update; write_seqcount_end(&jiffies_seq); raw_spin_unlock(&jiffies_lock); return period; } static inline int tick_sched_flag_test(struct tick_sched *ts, unsigned long flag) { return !!(ts->flags & flag); } static inline void tick_sched_flag_set(struct tick_sched *ts, unsigned long flag) { lockdep_assert_irqs_disabled(); ts->flags |= flag; } static inline void tick_sched_flag_clear(struct tick_sched *ts, unsigned long flag) { lockdep_assert_irqs_disabled(); ts->flags &= ~flag; } #define MAX_STALLED_JIFFIES 5 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int tick_cpu, cpu = smp_processor_id(); /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the CPU in charge went * into a long sleep. If two CPUs happen to assign themselves to * this duty, then the jiffies update is still serialized by * 'jiffies_lock'. * * If nohz_full is enabled, this should not happen because the * 'tick_do_timer_cpu' CPU never relinquishes. */ tick_cpu = READ_ONCE(tick_do_timer_cpu); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL WARN_ON_ONCE(tick_nohz_full_running); #endif WRITE_ONCE(tick_do_timer_cpu, cpu); tick_cpu = cpu; } /* Check if jiffies need an update */ if (tick_cpu == cpu) tick_do_update_jiffies64(now); /* * If the jiffies update stalled for too long (timekeeper in stop_machine() * or VMEXIT'ed for several msecs), force an update. */ if (ts->last_tick_jiffies != jiffies) { ts->stalled_jiffies = 0; ts->last_tick_jiffies = READ_ONCE(jiffies); } else { if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { tick_do_update_jiffies64(now); ts->stalled_jiffies = 0; ts->last_tick_jiffies = READ_ONCE(jiffies); } } if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) ts->got_idle_tick = 1; } static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long * time. This happens on completely idle SMP systems while * waiting on the login prompt. We also increment the "start of * idle" jiffy stamp so the idle accounting adjustment we do * when we go busy again does not account too many ticks. */ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; /* * In case the current tick fired too early past its expected * expiration, make sure we don't bypass the next clock reprogramming * to the same deadline. */ ts->next_tick = 0; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); } /* * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled. */ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer) { struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); tick_sched_do_timer(ts, now); /* * Do not call when we are not in IRQ context and have * no valid 'regs' pointer */ if (regs) tick_sched_handle(ts, regs); else ts->next_tick = 0; /* * In dynticks mode, tick reprogram is deferred: * - to the idle task if in dynticks-idle * - to IRQ exit if in full-dynticks. */ if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED))) return HRTIMER_NORESTART; hrtimer_forward(timer, now, TICK_NSEC); return HRTIMER_RESTART; } #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; EXPORT_SYMBOL_GPL(tick_nohz_full_mask); bool tick_nohz_full_running; EXPORT_SYMBOL_GPL(tick_nohz_full_running); static atomic_t tick_dep_mask; static bool check_tick_dependency(atomic_t *dep) { int val = atomic_read(dep); if (val & TICK_DEP_MASK_POSIX_TIMER) { trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); return true; } if (val & TICK_DEP_MASK_PERF_EVENTS) { trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS); return true; } if (val & TICK_DEP_MASK_SCHED) { trace_tick_stop(0, TICK_DEP_MASK_SCHED); return true; } if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) { trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); return true; } if (val & TICK_DEP_MASK_RCU) { trace_tick_stop(0, TICK_DEP_MASK_RCU); return true; } if (val & TICK_DEP_MASK_RCU_EXP) { trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP); return true; } return false; } static bool can_stop_full_tick(int cpu, struct tick_sched *ts) { lockdep_assert_irqs_disabled(); if (unlikely(!cpu_online(cpu))) return false; if (check_tick_dependency(&tick_dep_mask)) return false; if (check_tick_dependency(&ts->tick_dep_mask)) return false; if (check_tick_dependency(&current->tick_dep_mask)) return false; if (check_tick_dependency(&current->signal->tick_dep_mask)) return false; return true; } static void nohz_full_kick_func(struct irq_work *work) { /* Empty, the tick restart happens on tick_nohz_irq_exit() */ } static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = IRQ_WORK_INIT_HARD(nohz_full_kick_func); /* * Kick this CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), * is NMI safe. */ static void tick_nohz_full_kick(void) { if (!tick_nohz_full_cpu(smp_processor_id())) return; irq_work_queue(this_cpu_ptr(&nohz_full_kick_work)); } /* * Kick the CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. */ void tick_nohz_full_kick_cpu(int cpu) { if (!tick_nohz_full_cpu(cpu)) return; irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); } static void tick_nohz_kick_task(struct task_struct *tsk) { int cpu; /* * If the task is not running, run_posix_cpu_timers() * has nothing to elapse, and an IPI can then be optimized out. * * activate_task() STORE p->tick_dep_mask * STORE p->on_rq * __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or()) * LOCK rq->lock LOAD p->on_rq * smp_mb__after_spin_lock() * tick_nohz_task_switch() * LOAD p->tick_dep_mask * * XXX given a task picks up the dependency on schedule(), should we * only care about tasks that are currently on the CPU instead of all * that are on the runqueue? * * That is, does this want to be: task_on_cpu() / task_curr()? */ if (!sched_task_on_rq(tsk)) return; /* * If the task concurrently migrates to another CPU, * we guarantee it sees the new tick dependency upon * schedule. * * set_task_cpu(p, cpu); * STORE p->cpu = @cpu * __schedule() (switch to task 'p') * LOCK rq->lock * smp_mb__after_spin_lock() STORE p->tick_dep_mask * tick_nohz_task_switch() smp_mb() (atomic_fetch_or()) * LOAD p->tick_dep_mask LOAD p->cpu */ cpu = task_cpu(tsk); preempt_disable(); if (cpu_online(cpu)) tick_nohz_full_kick_cpu(cpu); preempt_enable(); } /* * Kick all full dynticks CPUs in order to force these to re-evaluate * their dependency on the tick and restart it if necessary. */ static void tick_nohz_full_kick_all(void) { int cpu; if (!tick_nohz_full_running) return; preempt_disable(); for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask) tick_nohz_full_kick_cpu(cpu); preempt_enable(); } static void tick_nohz_dep_set_all(atomic_t *dep, enum tick_dep_bits bit) { int prev; prev = atomic_fetch_or(BIT(bit), dep); if (!prev) tick_nohz_full_kick_all(); } /* * Set a global tick dependency. Used by perf events that rely on freq and * unstable clocks. */ void tick_nohz_dep_set(enum tick_dep_bits bit) { tick_nohz_dep_set_all(&tick_dep_mask, bit); } void tick_nohz_dep_clear(enum tick_dep_bits bit) { atomic_andnot(BIT(bit), &tick_dep_mask); } /* * Set per-CPU tick dependency. Used by scheduler and perf events in order to * manage event-throttling. */ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { int prev; struct tick_sched *ts; ts = per_cpu_ptr(&tick_cpu_sched, cpu); prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask); if (!prev) { preempt_disable(); /* Perf needs local kick that is NMI safe */ if (cpu == smp_processor_id()) { tick_nohz_full_kick(); } else { /* Remote IRQ work not NMI-safe */ if (!WARN_ON_ONCE(in_nmi())) tick_nohz_full_kick_cpu(cpu); } preempt_enable(); } } EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu); void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); atomic_andnot(BIT(bit), &ts->tick_dep_mask); } EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); /* * Set a per-task tick dependency. RCU needs this. Also posix CPU timers * in order to elapse per task timers. */ void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) tick_nohz_kick_task(tsk); } EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { atomic_andnot(BIT(bit), &tsk->tick_dep_mask); } EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task); /* * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse * per process timers. */ void tick_nohz_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { int prev; struct signal_struct *sig = tsk->signal; prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask); if (!prev) { struct task_struct *t; lockdep_assert_held(&tsk->sighand->siglock); __for_each_thread(sig, t) tick_nohz_kick_task(t); } } void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit) { atomic_andnot(BIT(bit), &sig->tick_dep_mask); } /* * Re-evaluate the need for the tick as we switch the current task. * It might need the tick due to per task/process properties: * perf events, posix CPU timers, ... */ void __tick_nohz_task_switch(void) { struct tick_sched *ts; if (!tick_nohz_full_cpu(smp_processor_id())) return; ts = this_cpu_ptr(&tick_cpu_sched); if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { if (atomic_read(&current->tick_dep_mask) || atomic_read(&current->signal->tick_dep_mask)) tick_nohz_full_kick(); } } /* Get the boot-time nohz CPU list from the kernel parameters. */ void __init tick_nohz_full_setup(cpumask_var_t cpumask) { alloc_bootmem_cpumask_var(&tick_nohz_full_mask); cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; } bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { /* * The 'tick_do_timer_cpu' CPU handles housekeeping duty (unbound * timers, workqueues, timekeeping, ...) on behalf of full dynticks * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu) return false; return true; } static int tick_nohz_cpu_down(unsigned int cpu) { return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY; } void __init tick_nohz_init(void) { int cpu, ret; if (!tick_nohz_full_running) return; /* * Full dynticks uses IRQ work to drive the tick rescheduling on safe * locking contexts. But then we need IRQ work to raise its own * interrupts to avoid circular dependency on the tick. */ if (!arch_irq_work_has_interrupt()) { pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n"); cpumask_clear(tick_nohz_full_mask); tick_nohz_full_running = false; return; } if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) && !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) { cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { pr_warn("NO_HZ: Clearing %d from nohz_full range " "for timekeeping\n", cpu); cpumask_clear_cpu(cpu, tick_nohz_full_mask); } } for_each_cpu(cpu, tick_nohz_full_mask) ct_cpu_track_user(cpu); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "kernel/nohz:predown", NULL, tick_nohz_cpu_down); WARN_ON(ret < 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); } #endif /* #ifdef CONFIG_NO_HZ_FULL */ /* * NOHZ - aka dynamic tick functionality */ #ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ bool tick_nohz_enabled __read_mostly = true; unsigned long tick_nohz_active __read_mostly; /* * Enable / Disable tickless mode */ static int __init setup_tick_nohz(char *str) { return (kstrtobool(str, &tick_nohz_enabled) == 0); } __setup("nohz=", setup_tick_nohz); bool tick_nohz_tick_stopped(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); return tick_sched_flag_test(ts, TS_FLAG_STOPPED); } bool tick_nohz_tick_stopped_cpu(int cpu) { struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu); return tick_sched_flag_test(ts, TS_FLAG_STOPPED); } /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted * @now: current ktime_t * * Called from interrupt entry when the CPU was idle * * In case the sched_tick was stopped on this CPU, we have to check if jiffies * must be updated. Otherwise an interrupt handler could use a stale jiffy * value. We do this unconditionally on any CPU, as we don't know whether the * CPU, which has the update task assigned, is in a long sleep. */ static void tick_nohz_update_jiffies(ktime_t now) { unsigned long flags; __this_cpu_write(tick_cpu_sched.idle_waketime, now); local_irq_save(flags); tick_do_update_jiffies64(now); local_irq_restore(flags); touch_softlockup_watchdog_sched(); } static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) { ktime_t delta; if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))) return; delta = ktime_sub(now, ts->idle_entrytime); write_seqcount_begin(&ts->idle_sleeptime_seq); if (nr_iowait_cpu(smp_processor_id()) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); else ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE); write_seqcount_end(&ts->idle_sleeptime_seq); sched_clock_idle_wakeup_event(); } static void tick_nohz_start_idle(struct tick_sched *ts) { write_seqcount_begin(&ts->idle_sleeptime_seq); ts->idle_entrytime = ktime_get(); tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE); write_seqcount_end(&ts->idle_sleeptime_seq); sched_clock_idle_sleep_event(); } static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, bool compute_delta, u64 *last_update_time) { ktime_t now, idle; unsigned int seq; if (!tick_nohz_active) return -1; now = ktime_get(); if (last_update_time) *last_update_time = ktime_to_us(now); do { seq = read_seqcount_begin(&ts->idle_sleeptime_seq); if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) { ktime_t delta = ktime_sub(now, ts->idle_entrytime); idle = ktime_add(*sleeptime, delta); } else { idle = *sleeptime; } } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq)); return ktime_to_us(idle); } /** * get_cpu_idle_time_us - get the total idle time of a CPU * @cpu: CPU number to query * @last_update_time: variable to store update time in. Do not update * counters if NULL. * * Return the cumulative idle time (since boot) for a given * CPU, in microseconds. Note that this is partially broken due to * the counter of iowait tasks that can be remotely updated without * any synchronization. Therefore it is possible to observe backward * values within two consecutive reads. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu */ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); return get_cpu_sleep_time_us(ts, &ts->idle_sleeptime, !nr_iowait_cpu(cpu), last_update_time); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); /** * get_cpu_iowait_time_us - get the total iowait time of a CPU * @cpu: CPU number to query * @last_update_time: variable to store update time in. Do not update * counters if NULL. * * Return the cumulative iowait time (since boot) for a given * CPU, in microseconds. Note this is partially broken due to * the counter of iowait tasks that can be remotely updated without * any synchronization. Therefore it is possible to observe backward * values within two consecutive reads. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. * * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu */ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); return get_cpu_sleep_time_us(ts, &ts->iowait_sleeptime, nr_iowait_cpu(cpu), last_update_time); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); hrtimer_set_expires(&ts->sched_timer, ts->last_tick); /* Forward the time to expire in the future */ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) { hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); } else { tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } /* * Reset to make sure the next tick stop doesn't get fooled by past * cached clock deadline. */ ts->next_tick = 0; } static inline bool local_timer_softirq_pending(void) { return local_timers_pending() & BIT(TIMER_SOFTIRQ); } /* * Read jiffies and the time when jiffies were updated last */ u64 get_jiffies_update(unsigned long *basej) { unsigned long basejiff; unsigned int seq; u64 basemono; do { seq = read_seqcount_begin(&jiffies_seq); basemono = last_jiffies_update; basejiff = jiffies; } while (read_seqcount_retry(&jiffies_seq, seq)); *basej = basejiff; return basemono; } /** * tick_nohz_next_event() - return the clock monotonic based next event * @ts: pointer to tick_sched struct * @cpu: CPU number * * Return: * *%0 - When the next event is a maximum of TICK_NSEC in the future * and the tick is not stopped yet * *%next_event - Next event based on clock monotonic */ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { u64 basemono, next_tick, delta, expires; unsigned long basejiff; int tick_cpu; basemono = get_jiffies_update(&basejiff); ts->last_jiffies = basejiff; ts->timer_expires_base = basemono; /* * Keep the periodic tick, when RCU, architecture or irq_work * requests it. * Aside of that, check whether the local timer softirq is * pending. If so, its a bad idea to call get_next_timer_interrupt(), * because there is an already expired timer, so it will request * immediate expiry, which rearms the hardware timer with a * minimal delta, which brings us back to this place * immediately. Lather, rinse and repeat... */ if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* * Get the next pending timer. If high resolution * timers are enabled this only takes the timer wheel * timers into account. If high resolution timers are * disabled this also looks at the next expiring * hrtimer. */ next_tick = get_next_timer_interrupt(basejiff, basemono); ts->next_timer = next_tick; } /* Make sure next_tick is never before basemono! */ if (WARN_ON_ONCE(basemono > next_tick)) next_tick = basemono; /* * If the tick is due in the next period, keep it ticking or * force prod the timer. */ delta = next_tick - basemono; if (delta <= (u64)TICK_NSEC) { /* * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. */ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ts->timer_expires = 0; goto out; } } /* * If this CPU is the one which had the do_timer() duty last, we limit * the sleep time to the timekeeping 'max_deferment' value. * Otherwise we can sleep as long as we want. */ delta = timekeeping_max_deferment(); tick_cpu = READ_ONCE(tick_do_timer_cpu); if (tick_cpu != cpu && (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST))) delta = KTIME_MAX; /* Calculate the next expiry time */ if (delta < (KTIME_MAX - basemono)) expires = basemono + delta; else expires = KTIME_MAX; ts->timer_expires = min_t(u64, expires, next_tick); out: return ts->timer_expires; } static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED); int tick_cpu; u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ ts->timer_expires_base = 0; /* * Now the tick should be stopped definitely - so the timer base needs * to be marked idle as well to not miss a newly queued timer. */ expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle); if (expires > ts->timer_expires) { /* * This path could only happen when the first timer was removed * between calculating the possible sleep length and now (when * high resolution mode is not active, timer could also be a * hrtimer). * * We have to stick to the original calculated expiry value to * not stop the tick for too long with a shallow C-state (which * was programmed by cpuidle because of an early next expiration * value). */ expires = ts->timer_expires; } /* If the timer base is not idle, retain the not yet stopped tick. */ if (!timer_idle) return; /* * If this CPU is the one which updates jiffies, then give up * the assignment and let it be taken by the CPU which runs * the tick timer next, which might be this CPU as well. If we * don't drop this here, the jiffies might be stale and * do_timer() never gets invoked. Keep track of the fact that it * was the one which had the do_timer() duty last. */ tick_cpu = READ_ONCE(tick_do_timer_cpu); if (tick_cpu == cpu) { WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST); } else if (tick_cpu != TICK_DO_TIMER_NONE) { tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST); } /* Skip reprogram of event if it's not changed */ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) { /* Sanity check: make sure clockevent is actually programmed */ if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) return; WARN_ONCE(1, "basemono: %llu ts->next_tick: %llu dev->next_event: %llu " "timer->active: %d timer->expires: %llu\n", basemono, ts->next_tick, dev->next_event, hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer)); } /* * tick_nohz_stop_tick() can be called several times before * tick_nohz_restart_sched_tick() is called. This happens when * interrupts arrive which do not cause a reschedule. In the first * call we save the current tick time, so we can restart the * scheduler tick in tick_nohz_restart_sched_tick(). */ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { calc_load_nohz_start(); quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); tick_sched_flag_set(ts, TS_FLAG_STOPPED); trace_tick_stop(1, TICK_DEP_MASK_NONE); } ts->next_tick = expires; /* * If the expiration time == KTIME_MAX, then we simply stop * the tick timer. */ if (unlikely(expires == KTIME_MAX)) { if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) hrtimer_cancel(&ts->sched_timer); else tick_program_event(KTIME_MAX, 1); return; } if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED_HARD); } else { hrtimer_set_expires(&ts->sched_timer, expires); tick_program_event(expires, 1); } } static void tick_nohz_retain_tick(struct tick_sched *ts) { ts->timer_expires_base = 0; } #ifdef CONFIG_NO_HZ_FULL static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu) { if (tick_nohz_next_event(ts, cpu)) tick_nohz_stop_tick(ts, cpu); else tick_nohz_retain_tick(ts); } #endif /* CONFIG_NO_HZ_FULL */ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { /* Update jiffies first */ tick_do_update_jiffies64(now); /* * Clear the timer idle flag, so we avoid IPIs on remote queueing and * the clock forward checks in the enqueue path: */ timer_clear_idle(); calc_load_nohz_stop(); touch_softlockup_watchdog_sched(); /* Cancel the scheduled timer and restore the tick: */ tick_sched_flag_clear(ts, TS_FLAG_STOPPED); tick_nohz_restart(ts, now); } static void __tick_nohz_full_update_tick(struct tick_sched *ts, ktime_t now) { #ifdef CONFIG_NO_HZ_FULL int cpu = smp_processor_id(); if (can_stop_full_tick(cpu, ts)) tick_nohz_full_stop_tick(ts, cpu); else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) tick_nohz_restart_sched_tick(ts, now); #endif } static void tick_nohz_full_update_tick(struct tick_sched *ts) { if (!tick_nohz_full_cpu(smp_processor_id())) return; if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ)) return; __tick_nohz_full_update_tick(ts, ktime_get()); } /* * A pending softirq outside an IRQ (or softirq disabled section) context * should be waiting for ksoftirqd to handle it. Therefore we shouldn't * reach this code due to the need_resched() early check in can_stop_idle_tick(). * * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, * triggering the code below, since wakep_softirqd() is ignored. * */ static bool report_idle_softirq(void) { static int ratelimit; unsigned int pending = local_softirq_pending(); if (likely(!pending)) return false; /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ if (!cpu_active(smp_processor_id())) { pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; if (!pending) return false; } if (ratelimit >= 10) return false; /* On RT, softirq handling may be waiting on some lock */ if (local_bh_blocked()) return false; pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", pending); ratelimit++; return true; } static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { WARN_ON_ONCE(cpu_is_offline(cpu)); if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ))) return false; if (need_resched()) return false; if (unlikely(report_idle_softirq())) return false; if (tick_nohz_full_enabled()) { int tick_cpu = READ_ONCE(tick_do_timer_cpu); /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around */ if (tick_cpu == cpu) return false; /* Should not happen for nohz-full */ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE)) return false; } return true; } /** * tick_nohz_idle_stop_tick - stop the idle tick from the idle task * * When the next event is more than a tick into the future, stop the idle tick */ void tick_nohz_idle_stop_tick(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); int cpu = smp_processor_id(); ktime_t expires; /* * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the * tick timer expiration time is known already. */ if (ts->timer_expires_base) expires = ts->timer_expires; else if (can_stop_idle_tick(cpu, ts)) expires = tick_nohz_next_event(ts, cpu); else return; ts->idle_calls++; if (expires > 0LL) { int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED); tick_nohz_stop_tick(ts, cpu); ts->idle_sleeps++; ts->idle_expires = expires; if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) { ts->idle_jiffies = ts->last_jiffies; nohz_balance_enter_idle(cpu); } } else { tick_nohz_retain_tick(ts); } } void tick_nohz_idle_retain_tick(void) { tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched)); } /** * tick_nohz_idle_enter - prepare for entering idle on the current CPU * * Called when we start the idle loop. */ void tick_nohz_idle_enter(void) { struct tick_sched *ts; lockdep_assert_irqs_enabled(); local_irq_disable(); ts = this_cpu_ptr(&tick_cpu_sched); WARN_ON_ONCE(ts->timer_expires_base); tick_sched_flag_set(ts, TS_FLAG_INIDLE); tick_nohz_start_idle(ts); local_irq_enable(); } /** * tick_nohz_irq_exit - Notify the tick about IRQ exit * * A timer may have been added/modified/deleted either by the current IRQ, * or by another place using this IRQ as a notification. This IRQ may have * also updated the RCU callback list. These events may require a * re-evaluation of the next tick. Depending on the context: * * 1) If the CPU is idle and no resched is pending, just proceed with idle * time accounting. The next tick will be re-evaluated on the next idle * loop iteration. * * 2) If the CPU is nohz_full: * * 2.1) If there is any tick dependency, restart the tick if stopped. * * 2.2) If there is no tick dependency, (re-)evaluate the next tick and * stop/update it accordingly. */ void tick_nohz_irq_exit(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) tick_nohz_start_idle