| 1 1 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Roccat Kone driver for Linux * * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net> */ /* */ /* * Roccat Kone is a gamer mouse which consists of a mouse part and a keyboard * part. The keyboard part enables the mouse to execute stored macros with mixed * key- and button-events. * * TODO implement on-the-fly polling-rate change * The windows driver has the ability to change the polling rate of the * device on the press of a mousebutton. * Is it possible to remove and reinstall the urb in raw-event- or any * other handler, or to defer this action to be executed somewhere else? * * TODO is it possible to overwrite group for sysfs attributes via udev? */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/hid-roccat.h> #include "hid-ids.h" #include "hid-roccat-common.h" #include "hid-roccat-kone.h" static uint profile_numbers[5] = {0, 1, 2, 3, 4}; static void kone_profile_activated(struct kone_device *kone, uint new_profile) { kone->actual_profile = new_profile; kone->actual_dpi = kone->profiles[new_profile - 1].startup_dpi; } static void kone_profile_report(struct kone_device *kone, uint new_profile) { struct kone_roccat_report roccat_report; roccat_report.event = kone_mouse_event_switch_profile; roccat_report.value = new_profile; roccat_report.key = 0; roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report); } static int kone_receive(struct usb_device *usb_dev, uint usb_command, void *data, uint size) { char *buf; int len; buf = kmalloc(size, GFP_KERNEL); if (buf == NULL) return -ENOMEM; len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), HID_REQ_GET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, usb_command, 0, buf, size, USB_CTRL_SET_TIMEOUT); memcpy(data, buf, size); kfree(buf); return ((len < 0) ? len : ((len != size) ? -EIO : 0)); } static int kone_send(struct usb_device *usb_dev, uint usb_command, void const *data, uint size) { char *buf; int len; buf = kmemdup(data, size, GFP_KERNEL); if (buf == NULL) return -ENOMEM; len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), HID_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, usb_command, 0, buf, size, USB_CTRL_SET_TIMEOUT); kfree(buf); return ((len < 0) ? len : ((len != size) ? -EIO : 0)); } static void kone_set_settings_checksum(struct kone_settings *settings) { uint16_t checksum = 0; unsigned char *address = (unsigned char *)settings; int i; for (i = 0; i < sizeof(struct kone_settings) - 2; ++i, ++address) checksum += *address; settings->checksum = cpu_to_le16(checksum); } /* * Checks success after writing data to mouse * On success returns 0 * On failure returns errno */ static int kone_check_write(struct usb_device *usb_dev) { int retval; uint8_t data; do { /* * Mouse needs 50 msecs until it says ok, but there are * 30 more msecs needed for next write to work. */ msleep(80); retval = kone_receive(usb_dev, kone_command_confirm_write, &data, 1); if (retval) return retval; /* * value of 3 seems to mean something like * "not finished yet, but it looks good" * So check again after a moment. */ } while (data == 3); if (data == 1) /* everything alright */ return 0; /* unknown answer */ dev_err(&usb_dev->dev, "got retval %d when checking write\n", data); return -EIO; } /* * Reads settings from mouse and stores it in @buf * On success returns 0 * On failure returns errno */ static int kone_get_settings(struct usb_device *usb_dev, struct kone_settings *buf) { return kone_receive(usb_dev, kone_command_settings, buf, sizeof(struct kone_settings)); } /* * Writes settings from @buf to mouse * On success returns 0 * On failure returns errno */ static int kone_set_settings(struct usb_device *usb_dev, struct kone_settings const *settings) { int retval; retval = kone_send(usb_dev, kone_command_settings, settings, sizeof(struct kone_settings)); if (retval) return retval; return kone_check_write(usb_dev); } /* * Reads profile data from mouse and stores it in @buf * @number: profile number to read * On success returns 0 * On failure returns errno */ static int kone_get_profile(struct usb_device *usb_dev, struct kone_profile *buf, int number) { int len; if (number < 1 || number > 5) return -EINVAL; len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), USB_REQ_CLEAR_FEATURE, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, kone_command_profile, number, buf, sizeof(struct kone_profile), USB_CTRL_SET_TIMEOUT); if (len != sizeof(struct kone_profile)) return -EIO; return 0; } /* * Writes profile data to mouse. * @number: profile number to write * On success returns 0 * On failure returns errno */ static int kone_set_profile(struct usb_device *usb_dev, struct kone_profile const *profile, int number) { int len; if (number < 1 || number > 5) return -EINVAL; len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), USB_REQ_SET_CONFIGURATION, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, kone_command_profile, number, (void *)profile, sizeof(struct kone_profile), USB_CTRL_SET_TIMEOUT); if (len != sizeof(struct kone_profile)) return len; if (kone_check_write(usb_dev)) return -EIO; return 0; } /* * Reads value of "fast-clip-weight" and stores it in @result * On success returns 0 * On failure returns errno */ static int kone_get_weight(struct usb_device *usb_dev, int *result) { int retval; uint8_t data; retval = kone_receive(usb_dev, kone_command_weight, &data, 1); if (retval) return retval; *result = (int)data; return 0; } /* * Reads firmware_version of mouse and stores it in @result * On success returns 0 * On failure returns errno */ static int kone_get_firmware_version(struct usb_device *usb_dev, int *result) { int retval; uint16_t data; retval = kone_receive(usb_dev, kone_command_firmware_version, &data, 2); if (retval) return retval; *result = le16_to_cpu(data); return 0; } static ssize_t kone_sysfs_read_settings(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); if (off >= sizeof(struct kone_settings)) return 0; if (off + count > sizeof(struct kone_settings)) count = sizeof(struct kone_settings) - off; mutex_lock(&kone->kone_lock); memcpy(buf, ((char const *)&kone->settings) + off, count); mutex_unlock(&kone->kone_lock); return count; } /* * Writing settings automatically activates startup_profile. * This function keeps values in kone_device up to date and assumes that in * case of error the old data is still valid */ static ssize_t kone_sysfs_write_settings(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); int retval = 0, difference, old_profile; struct kone_settings *settings = (struct kone_settings *)buf; /* I need to get my data in one piece */ if (off != 0 || count != sizeof(struct kone_settings)) return -EINVAL; mutex_lock(&kone->kone_lock); difference = memcmp(settings, &kone->settings, sizeof(struct kone_settings)); if (difference) { if (settings->startup_profile < 1 || settings->startup_profile > 5) { retval = -EINVAL; goto unlock; } retval = kone_set_settings(usb_dev, settings); if (retval) goto unlock; old_profile = kone->settings.startup_profile; memcpy(&kone->settings, settings, sizeof(struct kone_settings)); kone_profile_activated(kone, kone->settings.startup_profile); if (kone->settings.startup_profile != old_profile) kone_profile_report(kone, kone->settings.startup_profile); } unlock: mutex_unlock(&kone->kone_lock); if (retval) return retval; return sizeof(struct kone_settings); } static const BIN_ATTR(settings, 0660, kone_sysfs_read_settings, kone_sysfs_write_settings, sizeof(struct kone_settings)); static ssize_t kone_sysfs_read_profilex(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); if (off >= sizeof(struct kone_profile)) return 0; if (off + count > sizeof(struct kone_profile)) count = sizeof(struct kone_profile) - off; mutex_lock(&kone->kone_lock); memcpy(buf, ((char const *)&kone->profiles[*(uint *)(attr->private)]) + off, count); mutex_unlock(&kone->kone_lock); return count; } /* Writes data only if different to stored data */ static ssize_t kone_sysfs_write_profilex(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); struct kone_profile *profile; int retval = 0, difference; /* I need to get my data in one piece */ if (off != 0 || count != sizeof(struct kone_profile)) return -EINVAL; profile = &kone->profiles[*(uint *)(attr->private)]; mutex_lock(&kone->kone_lock); difference = memcmp(buf, profile, sizeof(struct kone_profile)); if (difference) { retval = kone_set_profile(usb_dev, (struct kone_profile const *)buf, *(uint *)(attr->private) + 1); if (!retval) memcpy(profile, buf, sizeof(struct kone_profile)); } mutex_unlock(&kone->kone_lock); if (retval) return retval; return sizeof(struct kone_profile); } #define PROFILE_ATTR(number) \ static const struct bin_attribute bin_attr_profile##number = { \ .attr = { .name = "profile" #number, .mode = 0660 }, \ .size = sizeof(struct kone_profile), \ .read = kone_sysfs_read_profilex, \ .write = kone_sysfs_write_profilex, \ .private = &profile_numbers[number-1], \ } PROFILE_ATTR(1); PROFILE_ATTR(2); PROFILE_ATTR(3); PROFILE_ATTR(4); PROFILE_ATTR(5); static ssize_t kone_sysfs_show_actual_profile(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->actual_profile); } static DEVICE_ATTR(actual_profile, 0440, kone_sysfs_show_actual_profile, NULL); static ssize_t kone_sysfs_show_actual_dpi(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->actual_dpi); } static DEVICE_ATTR(actual_dpi, 0440, kone_sysfs_show_actual_dpi, NULL); /* weight is read each time, since we don't get informed when it's changed */ static ssize_t kone_sysfs_show_weight(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone; struct usb_device *usb_dev; int weight = 0; int retval; dev = dev->parent->parent; kone = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); mutex_lock(&kone->kone_lock); retval = kone_get_weight(usb_dev, &weight); mutex_unlock(&kone->kone_lock); if (retval) return retval; return sysfs_emit(buf, "%d\n", weight); } static DEVICE_ATTR(weight, 0440, kone_sysfs_show_weight, NULL); static ssize_t kone_sysfs_show_firmware_version(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->firmware_version); } static DEVICE_ATTR(firmware_version, 0440, kone_sysfs_show_firmware_version, NULL); static ssize_t kone_sysfs_show_tcu(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->settings.tcu); } static int kone_tcu_command(struct usb_device *usb_dev, int number) { unsigned char value; value = number; return kone_send(usb_dev, kone_command_calibrate, &value, 1); } /* * Calibrating the tcu is the only action that changes settings data inside the * mouse, so this data needs to be reread */ static ssize_t kone_sysfs_set_tcu(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct kone_device *kone; struct usb_device *usb_dev; int retval; unsigned long state; dev = dev->parent->parent; kone = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); retval = kstrtoul(buf, 10, &state); if (retval) return retval; if (state != 0 && state != 1) return -EINVAL; mutex_lock(&kone->kone_lock); if (state == 1) { /* state activate */ retval = kone_tcu_command(usb_dev, 1); if (retval) goto exit_unlock; retval = kone_tcu_command(usb_dev, 2); if (retval) goto exit_unlock; ssleep(5); /* tcu needs this time for calibration */ retval = kone_tcu_command(usb_dev, 3); if (retval) goto exit_unlock; retval = kone_tcu_command(usb_dev, 0); if (retval) goto exit_unlock; retval = kone_tcu_command(usb_dev, 4); if (retval) goto exit_unlock; /* * Kone needs this time to settle things. * Reading settings too early will result in invalid data. * Roccat's driver waits 1 sec, maybe this time could be * shortened. */ ssleep(1); } /* calibration changes values in settings, so reread */ retval = kone_get_settings(usb_dev, &kone->settings); if (retval) goto exit_no_settings; /* only write settings back if activation state is different */ if (kone->settings.tcu != state) { kone->settings.tcu = state; kone_set_settings_checksum(&kone->settings); retval = kone_set_settings(usb_dev, &kone->settings); if (retval) { dev_err(&usb_dev->dev, "couldn't set tcu state\n"); /* * try to reread valid settings into buffer overwriting * first error code */ retval = kone_get_settings(usb_dev, &kone->settings); if (retval) goto exit_no_settings; goto exit_unlock; } /* calibration resets profile */ kone_profile_activated(kone, kone->settings.startup_profile); } retval = size; exit_no_settings: dev_err(&usb_dev->dev, "couldn't read settings\n"); exit_unlock: mutex_unlock(&kone->kone_lock); return retval; } static DEVICE_ATTR(tcu, 0660, kone_sysfs_show_tcu, kone_sysfs_set_tcu); static ssize_t kone_sysfs_show_startup_profile(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->settings.startup_profile); } static ssize_t kone_sysfs_set_startup_profile(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct kone_device *kone; struct usb_device *usb_dev; int retval; unsigned long new_startup_profile; dev = dev->parent->parent; kone = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); retval = kstrtoul(buf, 10, &new_startup_profile); if (retval) return retval; if (new_startup_profile < 1 || new_startup_profile > 5) return -EINVAL; mutex_lock(&kone->kone_lock); kone->settings.startup_profile = new_startup_profile; kone_set_settings_checksum(&kone->settings); retval = kone_set_settings(usb_dev, &kone->settings); if (retval) { mutex_unlock(&kone->kone_lock); return retval; } /* changing the startup profile immediately activates this profile */ kone_profile_activated(kone, new_startup_profile); kone_profile_report(kone, new_startup_profile); mutex_unlock(&kone->kone_lock); return size; } static DEVICE_ATTR(startup_profile, 0660, kone_sysfs_show_startup_profile, kone_sysfs_set_startup_profile); static struct attribute *kone_attrs[] = { /* * Read actual dpi settings. * Returns raw value for further processing. Refer to enum * kone_polling_rates to get real value. */ &dev_attr_actual_dpi.attr, &dev_attr_actual_profile.attr, /* * The mouse can be equipped with one of four supplied weights from 5 * to 20 grams which are recognized and its value can be read out. * This returns the raw value reported by the mouse for easy evaluation * by software. Refer to enum kone_weights to get corresponding real * weight. */ &dev_attr_weight.attr, /* * Prints firmware version stored in mouse as integer. * The raw value reported by the mouse is returned for easy evaluation, * to get the real version number the decimal point has to be shifted 2 * positions to the left. E.g. a value of 138 means 1.38. */ &dev_attr_firmware_version.attr, /* * Prints state of Tracking Control Unit as number where 0 = off and * 1 = on. Writing 0 deactivates tcu and writing 1 calibrates and * activates the tcu */ &dev_attr_tcu.attr, /* Prints and takes the number of the profile the mouse starts with */ &dev_attr_startup_profile.attr, NULL, }; static const struct bin_attribute *const kone_bin_attributes[] = { &bin_attr_settings, &bin_attr_profile1, &bin_attr_profile2, &bin_attr_profile3, &bin_attr_profile4, &bin_attr_profile5, NULL, }; static const struct attribute_group kone_group = { .attrs = kone_attrs, .bin_attrs = kone_bin_attributes, }; static const struct attribute_group *kone_groups[] = { &kone_group, NULL, }; /* kone_class is used for creating sysfs attributes via roccat char device */ static const struct class kone_class = { .name = "kone", .dev_groups = kone_groups, }; static int kone_init_kone_device_struct(struct usb_device *usb_dev, struct kone_device *kone) { uint i; int retval; mutex_init(&kone->kone_lock); for (i = 0; i < 5; ++i) { retval = kone_get_profile(usb_dev, &kone->profiles[i], i + 1); if (retval) return retval; } retval = kone_get_settings(usb_dev, &kone->settings); if (retval) return retval; retval = kone_get_firmware_version(usb_dev, &kone->firmware_version); if (retval) return retval; kone_profile_activated(kone, kone->settings.startup_profile); return 0; } /* * Since IGNORE_MOUSE quirk moved to hid-apple, there is no way to bind only to * mousepart if usb_hid is compiled into the kernel and kone is compiled as * module. * Secial behaviour is bound only to mousepart since only mouseevents contain * additional notifications. */ static int kone_init_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_device *usb_dev = interface_to_usbdev(intf); struct kone_device *kone; int retval; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) { kone = kzalloc(sizeof(*kone), GFP_KERNEL); if (!kone) return -ENOMEM; hid_set_drvdata(hdev, kone); retval = kone_init_kone_device_struct(usb_dev, kone); if (retval) { hid_err(hdev, "couldn't init struct kone_device\n"); goto exit_free; } retval = roccat_connect(&kone_class, hdev, sizeof(struct kone_roccat_report)); if (retval < 0) { hid_err(hdev, "couldn't init char dev\n"); /* be tolerant about not getting chrdev */ } else { kone->roccat_claimed = 1; kone->chrdev_minor = retval; } } else { hid_set_drvdata(hdev, NULL); } return 0; exit_free: kfree(kone); return retval; } static void kone_remove_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct kone_device *kone; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) { kone = hid_get_drvdata(hdev); if (kone->roccat_claimed) roccat_disconnect(kone->chrdev_minor); kfree(hid_get_drvdata(hdev)); } } static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; if (!hid_is_usb(hdev)) return -EINVAL; retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); goto exit; } retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (retval) { hid_err(hdev, "hw start failed\n"); goto exit; } retval = kone_init_specials(hdev); if (retval) { hid_err(hdev, "couldn't install mouse\n"); goto exit_stop; } return 0; exit_stop: hid_hw_stop(hdev); exit: return retval; } static void kone_remove(struct hid_device *hdev) { kone_remove_specials(hdev); hid_hw_stop(hdev); } /* handle special events and keep actual profile and dpi values up to date */ static void kone_keep_values_up_to_date(struct kone_device *kone, struct kone_mouse_event const *event) { switch (event->event) { case kone_mouse_event_switch_profile: kone->actual_dpi = kone->profiles[event->value - 1]. startup_dpi; fallthrough; case kone_mouse_event_osd_profile: kone->actual_profile = event->value; break; case kone_mouse_event_switch_dpi: case kone_mouse_event_osd_dpi: kone->actual_dpi = event->value; break; } } static void kone_report_to_chrdev(struct kone_device const *kone, struct kone_mouse_event const *event) { struct kone_roccat_report roccat_report; switch (event->event) { case kone_mouse_event_switch_profile: case kone_mouse_event_switch_dpi: case kone_mouse_event_osd_profile: case kone_mouse_event_osd_dpi: roccat_report.event = event->event; roccat_report.value = event->value; roccat_report.key = 0; roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report); break; case kone_mouse_event_call_overlong_macro: case kone_mouse_event_multimedia: if (event->value == kone_keystroke_action_press) { roccat_report.event = event->event; roccat_report.value = kone->actual_profile; roccat_report.key = event->macro_key; roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report); } break; } } /* * Is called for keyboard- and mousepart. * Only mousepart gets informations about special events in its extended event * structure. */ static int kone_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct kone_device *kone = hid_get_drvdata(hdev); struct kone_mouse_event *event = (struct kone_mouse_event *)data; /* keyboard events are always processed by default handler */ if (size != sizeof(struct kone_mouse_event)) return 0; if (kone == NULL) return 0; /* * Firmware 1.38 introduced new behaviour for tilt and special buttons. * Pressed button is reported in each movement event. * Workaround sends only one event per press. */ if (memcmp(&kone->last_mouse_event.tilt, &event->tilt, 5)) memcpy(&kone->last_mouse_event, event, sizeof(struct kone_mouse_event)); else memset(&event->wipe, 0, sizeof(event->wipe)); kone_keep_values_up_to_date(kone, event); if (kone->roccat_claimed) kone_report_to_chrdev(kone, event); return 0; /* always do further processing */ } static const struct hid_device_id kone_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) }, { } }; MODULE_DEVICE_TABLE(hid, kone_devices); static struct hid_driver kone_driver = { .name = "kone", .id_table = kone_devices, .probe = kone_probe, .remove = kone_remove, .raw_event = kone_raw_event }; static int __init kone_init(void) { int retval; /* class name has to be same as driver name */ retval = class_register(&kone_class); if (retval) return retval; retval = hid_register_driver(&kone_driver); if (retval) class_unregister(&kone_class); return retval; } static void __exit kone_exit(void) { hid_unregister_driver(&kone_driver); class_unregister(&kone_class); } module_init(kone_init); module_exit(kone_exit); MODULE_AUTHOR("Stefan Achatz"); MODULE_DESCRIPTION("USB Roccat Kone driver"); MODULE_LICENSE("GPL v2"); |
| 58 58 155 155 1257 1254 677 1 79 90 1152 1253 5 1257 487 481 880 27 54 25 380 379 379 379 380 1225 1225 1222 1226 8 1216 3 472 472 121 472 471 155 154 154 155 121 34 13 3 155 154 151 4 147 74 74 74 58 4 45 26 17 1 8 27 1 58 35 11 3 36 23 10 13 199 201 3 198 200 12 12 12 9 2 2 2 13 1 1 1 10 10 9 6 3 4 6 5 23 22 6 29 49 1 1 1 1 24 12 5 2 27 1 27 10 17 27 27 14 13 4 23 24 6 25 2 27 18 7 39 39 2 28 3 27 22 20 1 1 2 8 10 27 8 22 4 1 4 101 100 1 65 36 4 43 13 83 83 3 78 2 5 4 39 79 2 43 9 21 2 1 2 2 34 34 26 2 5 1 19 14 7 3 23 23 1 2 1 1 5 1 7 4 2 1 8 8 3 8 3 3 5 3 30 30 29 29 277 279 279 279 278 279 52 242 24 263 30 8 30 1 30 277 63 7 28 19 1 3 2 1 3 2039 2039 2037 1401 8 1208 43 3 9 1455 485 22 549 3 422 46 45 213 472 28 28 213 211 1 213 209 6 6 208 213 196 30 3 210 211 2 2 209 12 213 7 1 3 3 2 8 3 7 7 1 1 5 2 2 1 2 10 7 9 11 10 2 23 23 9 14 1 1 2 2 2 10 10 3 10 23 23 21 2 206 204 205 1944 1941 2043 2047 395 66 22 22 1 4 15 3 1 15 15 1 4 12 6 1422 1419 1423 1421 1420 1440 1 1 499 1269 1261 1260 1269 1261 1261 7 1422 1423 1423 2 9 6 18 11 7 2 2 1 2 2 2 5 3 2 5 14 11 1 2 12 12 12 2 12 5 1 4 12 1243 1243 1244 2 499 1253 10 1244 499 43 43 43 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 | // SPDX-License-Identifier: GPL-2.0-or-later /* * NET3 IP device support routines. * * Derived from the IP parts of dev.c 1.0.19 * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * * Additional Authors: * Alan Cox, <gw4pts@gw4pts.ampr.org> * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: * Alexey Kuznetsov: pa_* fields are replaced with ifaddr * lists. * Cyrus Durgin: updated for kmod * Matthias Andree: in devinet_ioctl, compare label and * address (4.4BSD alias style support), * fall back to comparing just the label * if no match found. */ #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include "igmp_internal.h" #include <linux/slab.h> #include <linux/hash.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/kmod.h> #include <linux/netconf.h> #include <net/arp.h> #include <net/ip.h> #include <net/route.h> #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/addrconf.h> #define IPV6ONLY_FLAGS \ (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \ IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \ IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY) static struct ipv4_devconf ipv4_devconf = { .data = { [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1, }, }; static struct ipv4_devconf ipv4_devconf_dflt = { .data = { [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1, }, }; #define IPV4_DEVCONF_DFLT(net, attr) \ IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, [IFA_FLAGS] = { .type = NLA_U32 }, [IFA_RT_PRIORITY] = { .type = NLA_U32 }, [IFA_TARGET_NETNSID] = { .type = NLA_S32 }, [IFA_PROTO] = { .type = NLA_U8 }, }; #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) static u32 inet_addr_hash(const struct net *net, __be32 addr) { u32 val = __ipv4_addr_hash(addr, net_hash_mix(net)); return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { u32 hash = inet_addr_hash(net, ifa->ifa_local); ASSERT_RTNL(); hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]); } static void inet_hash_remove(struct in_ifaddr *ifa) { ASSERT_RTNL(); hlist_del_init_rcu(&ifa->addr_lst); } /** * __ip_dev_find - find the first device with a given source address. * @net: the net namespace * @addr: the source address * @devref: if true, take a reference on the found device * * If a caller uses devref=false, it should be protected by RCU, or RTNL */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { struct net_device *result = NULL; struct in_ifaddr *ifa; rcu_read_lock(); ifa = inet_lookup_ifaddr_rcu(net, addr); if (!ifa) { struct flowi4 fl4 = { .daddr = addr }; struct fib_result res = { 0 }; struct fib_table *local; /* Fallback to FIB local table so that communication * over loopback subnets work. */ local = fib_get_table(net, RT_TABLE_LOCAL); if (local && !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && res.type == RTN_LOCAL) result = FIB_RES_DEV(res); } else { result = ifa->ifa_dev->dev; } if (result && devref) dev_hold(result); rcu_read_unlock(); return result; } EXPORT_SYMBOL(__ip_dev_find); /* called under RCU lock */ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr) { u32 hash = inet_addr_hash(net, addr); struct in_ifaddr *ifa; hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst) if (ifa->ifa_local == addr) return ifa; return NULL; } static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr __rcu **ifap, int destroy); #ifdef CONFIG_SYSCTL static int devinet_sysctl_register(struct in_device *idev); static void devinet_sysctl_unregister(struct in_device *idev); #else static int devinet_sysctl_register(struct in_device *idev) { return 0; } static void devinet_sysctl_unregister(struct in_device *idev) { } #endif /* Locks all the inet devices. */ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev) { struct in_ifaddr *ifa; ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT); if (!ifa) return NULL; in_dev_hold(in_dev); ifa->ifa_dev = in_dev; INIT_HLIST_NODE(&ifa->addr_lst); return ifa; } static void inet_rcu_free_ifa(struct rcu_head *head) { struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); in_dev_put(ifa->ifa_dev); kfree(ifa); } static void inet_free_ifa(struct in_ifaddr *ifa) { /* Our reference to ifa->ifa_dev must be freed ASAP * to release the reference to the netdev the same way. * in_dev_put() -> in_dev_finish_destroy() -> netdev_put() */ call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa); } static void in_dev_free_rcu(struct rcu_head *head) { struct in_device *idev = container_of(head, struct in_device, rcu_head); kfree(rcu_dereference_protected(idev->mc_hash, 1)); kfree(idev); } void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif netdev_put(dev, &idev->dev_tracker); if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else call_rcu(&idev->rcu_head, in_dev_free_rcu); } EXPORT_SYMBOL(in_dev_finish_destroy); static struct in_device *inetdev_init(struct net_device *dev) { struct in_device *in_dev; int err = -ENOMEM; ASSERT_RTNL(); in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) goto out; memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); if (!in_dev->arp_parms) goto out_kfree; if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) netif_disable_lro(dev); /* Reference in_dev->dev */ netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); if (dev != blackhole_netdev) { err = devinet_sysctl_register(in_dev); if (err) { in_dev->dead = 1; neigh_parms_release(&arp_tbl, in_dev->arp_parms); in_dev_put(in_dev); in_dev = NULL; goto out; } ip_mc_init_dev(in_dev); if (dev->flags & IFF_UP) ip_mc_up(in_dev); } /* we can receive as soon as ip_ptr is set -- do this last */ rcu_assign_pointer(dev->ip_ptr, in_dev); out: return in_dev ?: ERR_PTR(err); out_kfree: kfree(in_dev); in_dev = NULL; goto out; } static void inetdev_destroy(struct in_device *in_dev) { struct net_device *dev; struct in_ifaddr *ifa; ASSERT_RTNL(); dev = in_dev->dev; in_dev->dead = 1; ip_mc_destroy_dev(in_dev); while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) { inet_del_ifa(in_dev, &in_dev->ifa_list, 0); inet_free_ifa(ifa); } RCU_INIT_POINTER(dev->ip_ptr, NULL); devinet_sysctl_unregister(in_dev); neigh_parms_release(&arp_tbl, in_dev->arp_parms); arp_ifdown(dev); in_dev_put(in_dev); } static int __init inet_blackhole_dev_init(void) { struct in_device *in_dev; rtnl_lock(); in_dev = inetdev_init(blackhole_netdev); rtnl_unlock(); return PTR_ERR_OR_ZERO(in_dev); } late_initcall(inet_blackhole_dev_init); int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { const struct in_ifaddr *ifa; rcu_read_lock(); in_dev_for_each_ifa_rcu(ifa, in_dev) { if (inet_ifa_match(a, ifa)) { if (!b || inet_ifa_match(b, ifa)) { rcu_read_unlock(); return 1; } } } rcu_read_unlock(); return 0; } static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr __rcu **ifap, int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1; struct in_ifaddr __rcu **last_prim; struct in_ifaddr *prev_prom = NULL; int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); ASSERT_RTNL(); ifa1 = rtnl_dereference(*ifap); last_prim = ifap; if (in_dev->dead) goto no_promotions; /* 1. Deleting primary ifaddr forces deletion all secondaries * unless alias promotion is set **/ if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next; while ((ifa = rtnl_dereference(*ifap1)) != NULL) { if (!(ifa->ifa_flags & IFA_F_SECONDARY) && ifa1->ifa_scope <= ifa->ifa_scope) last_prim = &ifa->ifa_next; if (!(ifa->ifa_flags & IFA_F_SECONDARY) || ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) { ifap1 = &ifa->ifa_next; prev_prom = ifa; continue; } if (!do_promote) { inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); } else { promote = ifa; break; } } } /* On promotion all secondaries from subnet are changing * the primary IP, we must remove all their routes silently * and later to add them back with new prefsrc. Do this * while all addresses are on the device list. */ for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) fib_del_ifaddr(ifa, ifa1); } no_promotions: /* 2. Unlink it */ *ifap = ifa1->ifa_next; inet_hash_remove(ifa1); /* 3. Announce address deletion */ /* Send message first, then call notifier. At first sight, FIB update triggered by notifier will refer to already deleted ifaddr, that could confuse netlink listeners. It is not true: look, gated sees that route deleted and if it still thinks that ifaddr is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { struct in_ifaddr *next_sec; next_sec = rtnl_dereference(promote->ifa_next); if (prev_prom) { struct in_ifaddr *last_sec; rcu_assign_pointer(prev_prom->ifa_next, next_sec); last_sec = rtnl_dereference(*last_prim); rcu_assign_pointer(promote->ifa_next, last_sec); rcu_assign_pointer(*last_prim, promote); } promote->ifa_flags &= ~IFA_F_SECONDARY; rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = next_sec; ifa; ifa = rtnl_dereference(ifa->ifa_next)) { if (ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) continue; fib_add_ifaddr(ifa); } } if (destroy) inet_free_ifa(ifa1); } static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr __rcu **ifap, int destroy) { __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid, struct netlink_ext_ack *extack) { struct in_ifaddr __rcu **last_primary, **ifap; struct in_device *in_dev = ifa->ifa_dev; struct net *net = dev_net(in_dev->dev); struct in_validator_info ivi; struct in_ifaddr *ifa1; int ret; ASSERT_RTNL(); ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; /* Don't set IPv6 only flags to IPv4 addresses */ ifa->ifa_flags &= ~IPV6ONLY_FLAGS; ifap = &in_dev->ifa_list; ifa1 = rtnl_dereference(*ifap); while (ifa1) { if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope) last_primary = &ifa1->ifa_next; if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) { if (ifa1->ifa_local == ifa->ifa_local) { inet_free_ifa(ifa); return -EEXIST; } if (ifa1->ifa_scope != ifa->ifa_scope) { NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value"); inet_free_ifa(ifa); return -EINVAL; } ifa->ifa_flags |= IFA_F_SECONDARY; } ifap = &ifa1->ifa_next; ifa1 = rtnl_dereference(*ifap); } /* Allow any devices that wish to register ifaddr validtors to weigh * in now, before changes are committed. The rntl lock is serializing * access here, so the state should not change between a validator call * and a final notify on commit. This isn't invoked on promotion under * the assumption that validators are checking the address itself, and * not the flags. */ ivi.ivi_addr = ifa->ifa_address; ivi.ivi_dev = ifa->ifa_dev; ivi.extack = extack; ret = blocking_notifier_call_chain(&inetaddr_validator_chain, NETDEV_UP, &ivi); ret = notifier_to_errno(ret); if (ret) { inet_free_ifa(ifa); return ret; } if (!(ifa->ifa_flags & IFA_F_SECONDARY)) ifap = last_primary; rcu_assign_pointer(ifa->ifa_next, *ifap); rcu_assign_pointer(*ifap, ifa); inet_hash_insert(dev_net(in_dev->dev), ifa); cancel_delayed_work(&net->ipv4.addr_chk_work); queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0); /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; } static int inet_insert_ifa(struct in_ifaddr *ifa) { if (!ifa->ifa_local) { inet_free_ifa(ifa); return 0; } return __inet_insert_ifa(ifa, NULL, 0, NULL); } static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get_rtnl_net(dev); ipv4_devconf_setall(in_dev); neigh_parms_data_state_setall(in_dev->arp_parms); if (ipv4_is_loopback(ifa->ifa_local)) ifa->ifa_scope = RT_SCOPE_HOST; return inet_insert_ifa(ifa); } /* Caller must hold RCU or RTNL : * We dont take a reference on found in_device */ struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) in_dev = rcu_dereference_rtnl(dev->ip_ptr); rcu_read_unlock(); return in_dev; } EXPORT_SYMBOL(inetdev_by_index); /* Called only from RTNL semaphored context. No locks. */ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask) { struct in_ifaddr *ifa; ASSERT_RTNL(); in_dev_for_each_ifa_rtnl(ifa, in_dev) { if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) return ifa; } return NULL; } static int ip_mc_autojoin_config(struct net *net, bool join, const struct in_ifaddr *ifa) { #if defined(CONFIG_IP_MULTICAST) struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ifa->ifa_address, .imr_ifindex = ifa->ifa_dev->dev->ifindex, }; struct sock *sk = net->ipv4.mc_autojoin_sk; int ret; ASSERT_RTNL_NET(net); lock_sock(sk); if (join) ret = ip_mc_join_group(sk, &mreq); else ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); return ret; #else return -EOPNOTSUPP; #endif } static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct in_ifaddr __rcu **ifap; struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; struct in_ifaddr *ifa; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) goto out; ifm = nlmsg_data(nlh); rtnl_net_lock(net); in_dev = inetdev_by_index(net, ifm->ifa_index); if (!in_dev) { NL_SET_ERR_MSG(extack, "ipv4: Device not found"); err = -ENODEV; goto unlock; } for (ifap = &in_dev->ifa_list; (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) continue; if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) continue; if (tb[IFA_ADDRESS] && (ifm->ifa_prefixlen != ifa->ifa_prefixlen || !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) continue; if (ipv4_is_multicast(ifa->ifa_address)) ip_mc_autojoin_config(net, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); goto unlock; } NL_SET_ERR_MSG(extack, "ipv4: Address not found"); err = -EADDRNOTAVAIL; unlock: rtnl_net_unlock(net); out: return err; } static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; struct hlist_node *n; struct net *net; int i; net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); for (i = 0; i < IN4_ADDR_HSIZE; i++) { struct hlist_head *head = &net->ipv4.inet_addr_lst[i]; bool change_needed = false; rcu_read_lock(); hlist_for_each_entry_rcu(ifa, head, addr_lst) { unsigned long age, tstamp; u32 preferred_lft; u32 valid_lft; u32 flags; flags = READ_ONCE(ifa->ifa_flags); if (flags & IFA_F_PERMANENT) continue; preferred_lft = READ_ONCE(ifa->ifa_preferred_lft); valid_lft = READ_ONCE(ifa->ifa_valid_lft); tstamp = READ_ONCE(ifa->ifa_tstamp); /* We try to batch several events at once. */ age = (now - tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (valid_lft != INFINITY_LIFE_TIME && age >= valid_lft) { change_needed = true; } else if (preferred_lft == INFINITY_LIFE_TIME) { continue; } else if (age >= preferred_lft) { if (time_before(tstamp + valid_lft * HZ, next)) next = tstamp + valid_lft * HZ; if (!(flags & IFA_F_DEPRECATED)) change_needed = true; } else if (time_before(tstamp + preferred_lft * HZ, next)) { next = tstamp + preferred_lft * HZ; } } rcu_read_unlock(); if (!change_needed) continue; rtnl_net_lock(net); hlist_for_each_entry_safe(ifa, n, head, addr_lst) { unsigned long age; if (ifa->ifa_flags & IFA_F_PERMANENT) continue; /* We try to batch several events at once. */ age = (now - ifa->ifa_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_valid_lft) { struct in_ifaddr __rcu **ifap; struct in_ifaddr *tmp; ifap = &ifa->ifa_dev->ifa_list; tmp = rtnl_net_dereference(net, *ifap); while (tmp) { if (tmp == ifa) { inet_del_ifa(ifa->ifa_dev, ifap, 1); break; } ifap = &tmp->ifa_next; tmp = rtnl_net_dereference(net, *ifap); } } else if (ifa->ifa_preferred_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_preferred_lft && !(ifa->ifa_flags & IFA_F_DEPRECATED)) { ifa->ifa_flags |= IFA_F_DEPRECATED; rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } rtnl_net_unlock(net); } next_sec = round_jiffies_up(next); next_sched = next; /* If rounded timeout is accurate enough, accept it. */ if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) next_sched = next_sec; now = jiffies; /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, next_sched - now); } static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, __u32 prefered_lft) { unsigned long timeout; u32 flags; flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) WRITE_ONCE(ifa->ifa_valid_lft, timeout); else flags |= IFA_F_PERMANENT; timeout = addrconf_timeout_fixup(prefered_lft, HZ); if (addrconf_finite_timeout(timeout)) { if (timeout == 0) flags |= IFA_F_DEPRECATED; WRITE_ONCE(ifa->ifa_preferred_lft, timeout); } WRITE_ONCE(ifa->ifa_flags, flags); WRITE_ONCE(ifa->ifa_tstamp, jiffies); if (!ifa->ifa_cstamp) WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); } static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack, __u32 *valid_lft, __u32 *prefered_lft) { struct ifaddrmsg *ifm = nlmsg_data(nlh); int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) return err; if (ifm->ifa_prefixlen > 32) { NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length"); return -EINVAL; } if (!tb[IFA_LOCAL]) { NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied"); return -EINVAL; } if (tb[IFA_CACHEINFO]) { struct ifa_cacheinfo *ci; ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); return -EINVAL; } *valid_lft = ci->ifa_valid; *prefered_lft = ci->ifa_prefered; } return 0; } static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { struct ifaddrmsg *ifm = nlmsg_data(nlh); struct in_device *in_dev; struct net_device *dev; struct in_ifaddr *ifa; int err; dev = __dev_get_by_index(net, ifm->ifa_index); err = -ENODEV; if (!dev) { NL_SET_ERR_MSG(extack, "ipv4: Device not found"); goto errout; } in_dev = __in_dev_get_rtnl_net(dev); err = -ENOBUFS; if (!in_dev) goto errout; ifa = inet_alloc_ifa(in_dev); if (!ifa) /* * A potential indev allocation can be left alive, it stays * assigned to its device and is destroy with it. */ goto errout; ipv4_devconf_setall(in_dev); neigh_parms_data_state_setall(in_dev->arp_parms); if (!tb[IFA_ADDRESS]) tb[IFA_ADDRESS] = tb[IFA_LOCAL]; ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags); ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); if (tb[IFA_BROADCAST]) ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); if (tb[IFA_LABEL]) nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); if (tb[IFA_RT_PRIORITY]) ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]); if (tb[IFA_PROTO]) ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]); return ifa; errout: return ERR_PTR(err); } static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1; in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa) && ifa1->ifa_local == ifa->ifa_local) return ifa1; } return NULL; } static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { __u32 prefered_lft = INFINITY_LIFE_TIME; __u32 valid_lft = INFINITY_LIFE_TIME; struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa_existing; struct nlattr *tb[IFA_MAX + 1]; struct in_ifaddr *ifa; int ret; ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft); if (ret < 0) return ret; if (!nla_get_in_addr(tb[IFA_LOCAL])) return 0; rtnl_net_lock(net); ifa = inet_rtm_to_ifa(net, nlh, tb, extack); if (IS_ERR(ifa)) { ret = PTR_ERR(ifa); goto unlock; } ifa_existing = find_matching_ifa(net, ifa); if (!ifa_existing) { /* It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed"); inet_free_ifa(ifa); goto unlock; } } ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack); } else { u32 new_metric = ifa->ifa_rt_priority; u8 new_proto = ifa->ifa_proto; inet_free_ifa(ifa); if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) { NL_SET_ERR_MSG(extack, "ipv4: Address already assigned"); ret = -EEXIST; goto unlock; } ifa = ifa_existing; if (ifa->ifa_rt_priority != new_metric) { fib_modify_prefix_metric(ifa, new_metric); ifa->ifa_rt_priority = new_metric; } ifa->ifa_proto = new_proto; set_ifa_lifetime(ifa, valid_lft, prefered_lft); cancel_delayed_work(&net->ipv4.addr_chk_work); queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); } unlock: rtnl_net_unlock(net); return ret; } /* * Determine a default network mask, based on the IP address. */ static int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) rc = 0; else { __u32 haddr = ntohl(addr); if (IN_CLASSA(haddr)) rc = 8; else if (IN_CLASSB(haddr)) rc = 16; else if (IN_CLASSC(haddr)) rc = 24; else if (IN_CLASSE(haddr)) rc = 32; } return rc; } int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { struct sockaddr_in sin_orig; struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; struct in_ifaddr __rcu **ifap = NULL; struct in_device *in_dev; struct in_ifaddr *ifa = NULL; struct net_device *dev; char *colon; int ret = -EFAULT; int tryaddrmatch = 0; ifr->ifr_name[IFNAMSIZ - 1] = 0; /* save original address for comparison */ memcpy(&sin_orig, sin, sizeof(*sin)); colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; dev_load(net, ifr->ifr_name); switch (cmd) { case SIOCGIFADDR: /* Get interface address */ case SIOCGIFBRDADDR: /* Get the broadcast address */ case SIOCGIFDSTADDR: /* Get the destination address */ case SIOCGIFNETMASK: /* Get the netmask for the interface */ /* Note that these ioctls will not sleep, so that we do not impose a lock. One day we will be forced to put shlock here (I mean SMP) */ tryaddrmatch = (sin_orig.sin_family == AF_INET); memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; break; case SIOCSIFFLAGS: ret = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto out; break; case SIOCSIFADDR: /* Set interface address (and family) */ case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ ret = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto out; ret = -EINVAL; if (sin->sin_family != AF_INET) goto out; break; default: ret = -EINVAL; goto out; } rtnl_net_lock(net); ret = -ENODEV; dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) goto done; if (colon) *colon = ':'; in_dev = __in_dev_get_rtnl_net(dev); if (in_dev) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ /* note: we only do this for a limited set of ioctls and only if the original address family was AF_INET. This is checked above. */ for (ifap = &in_dev->ifa_list; (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) { if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == ifa->ifa_local) { break; /* found */ } } } /* we didn't get a match, maybe the application is 4.3BSD-style and passed in junk so we fall back to comparing just the label */ if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; } } ret = -EADDRNOTAVAIL; if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) goto done; switch (cmd) { case SIOCGIFADDR: /* Get interface address */ ret = 0; sin->sin_addr.s_addr = ifa->ifa_local; break; case SIOCGIFBRDADDR: /* Get the broadcast address */ ret = 0; sin->sin_addr.s_addr = ifa->ifa_broadcast; break; case SIOCGIFDSTADDR: /* Get the destination address */ ret = 0; sin->sin_addr.s_addr = ifa->ifa_address; break; case SIOCGIFNETMASK: /* Get the netmask for the interface */ ret = 0; sin->sin_addr.s_addr = ifa->ifa_mask; break; case SIOCSIFFLAGS: if (colon) { ret = -EADDRNOTAVAIL; if (!ifa) break; ret = 0; if (!(ifr->ifr_flags & IFF_UP)) inet_del_ifa(in_dev, ifap, 1); break; } /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */ ASSERT_RTNL(); ret = dev_change_flags(dev, ifr->ifr_flags, NULL); break; case SIOCSIFADDR: /* Set interface address (and family) */ ret = -EINVAL; if (inet_abc_len(sin->sin_addr.s_addr) < 0) break; if (!ifa) { ret = -ENOBUFS; if (!in_dev) break; ifa = inet_alloc_ifa(in_dev); if (!ifa) break; if (colon) memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); } else { ret = 0; if (ifa->ifa_local == sin->sin_addr.s_addr) break; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = 0; ifa->ifa_scope = 0; } ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; if (!(dev->flags & IFF_POINTOPOINT)) { ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); if ((dev->flags & IFF_BROADCAST) && ifa->ifa_prefixlen < 31) ifa->ifa_broadcast = ifa->ifa_address | ~ifa->ifa_mask; } else { ifa->ifa_prefixlen = 32; ifa->ifa_mask = inet_make_mask(32); } set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ret = inet_set_ifa(dev, ifa); break; case SIOCSIFBRDADDR: /* Set the broadcast address */ ret = 0; if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = sin->sin_addr.s_addr; inet_insert_ifa(ifa); } break; case SIOCSIFDSTADDR: /* Set the destination address */ ret = 0; if (ifa->ifa_address == sin->sin_addr.s_addr) break; ret = -EINVAL; if (inet_abc_len(sin->sin_addr.s_addr) < 0) break; ret = 0; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_address = sin->sin_addr.s_addr; inet_insert_ifa(ifa); break; case SIOCSIFNETMASK: /* Set the netmask for the interface */ /* * The mask we set must be legal. */ ret = -EINVAL; if (bad_mask(sin->sin_addr.s_addr, 0)) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { __be32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); /* See if current broadcast address matches * with current netmask, then recalculate * the broadcast address. Otherwise it's a * funny address, so don't touch it since * the user seems to know what (s)he's doing... */ if ((dev->flags & IFF_BROADCAST) && (ifa->ifa_prefixlen < 31) && (ifa->ifa_broadcast == (ifa->ifa_local|~old_mask))) { ifa->ifa_broadcast = (ifa->ifa_local | ~sin->sin_addr.s_addr); } inet_insert_ifa(ifa); } break; } done: rtnl_net_unlock(net); out: return ret; } int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl_net(dev); const struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; if (WARN_ON(size > sizeof(struct ifreq))) goto out; if (!in_dev) goto out; in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) { if (!buf) { done += size; continue; } if (len < size) break; memset(&ifr, 0, sizeof(struct ifreq)); strcpy(ifr.ifr_name, ifa->ifa_label); (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local; if (copy_to_user(buf + done, &ifr, size)) { done = -EFAULT; break; } len -= size; done += size; } out: return done; } static __be32 in_dev_select_addr(const struct in_device *in_dev, int scope) { const struct in_ifaddr *ifa; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) continue; if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) return ifa->ifa_local; } return 0; } __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { const struct in_ifaddr *ifa; __be32 addr = 0; unsigned char localnet_scope = RT_SCOPE_HOST; struct in_device *in_dev; struct net *net; int master_idx; rcu_read_lock(); net = dev_net_rcu(dev); in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev))) localnet_scope = RT_SCOPE_LINK; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) continue; if (min(ifa->ifa_scope, localnet_scope) > scope) continue; if (!dst || inet_ifa_match(dst, ifa)) { addr = ifa->ifa_local; break; } if (!addr) addr = ifa->ifa_local; } if (addr) goto out_unlock; no_in_dev: master_idx = l3mdev_master_ifindex_rcu(dev); /* For VRFs, the VRF device takes the place of the loopback device, * with addresses on it being preferred. Note in such cases the * loopback device will be among the devices that fail the master_idx * equality check in the loop below. */ if (master_idx && (dev = dev_get_by_index_rcu(net, master_idx)) && (in_dev = __in_dev_get_rcu(dev))) { addr = in_dev_select_addr(in_dev, scope); if (addr) goto out_unlock; } /* Not loopback addresses on loopback should be preferred in this case. It is important that lo is the first interface in dev_base list. */ for_each_netdev_rcu(net, dev) { if (l3mdev_master_ifindex_rcu(dev) != master_idx) continue; in_dev = __in_dev_get_rcu(dev); if (!in_dev) continue; addr = in_dev_select_addr(in_dev, scope); if (addr) goto out_unlock; } out_unlock: rcu_read_unlock(); return addr; } EXPORT_SYMBOL(inet_select_addr); static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) { unsigned char localnet_scope = RT_SCOPE_HOST; const struct in_ifaddr *ifa; __be32 addr = 0; int same = 0; if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev))) localnet_scope = RT_SCOPE_LINK; in_dev_for_each_ifa_rcu(ifa, in_dev) { unsigned char min_scope = min(ifa->ifa_scope, localnet_scope); if (!addr && (local == ifa->ifa_local || !local) && min_scope <= scope) { addr = ifa->ifa_local; if (same) break; } if (!same) { same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa)); if (same && addr) { if (local || !dst) break; /* Is the selected addr into dst subnet? */ if (inet_ifa_match(addr, ifa)) break; /* No, then can we use new local src? */ if (min_scope <= scope) { addr = ifa->ifa_local; break; } /* search for large dst subnet for addr */ same = 0; } } } return same ? addr : 0; } /* * Confirm that local IP address exists using wildcards: * - net: netns to check, cannot be NULL * - in_dev: only on this interface, NULL=any interface * - dst: only in the same subnet as dst, 0=any dst * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) { __be32 addr = 0; struct net_device *dev; if (in_dev) return confirm_addr_indev(in_dev, dst, local, scope); rcu_read_lock(); for_each_netdev_rcu(net, dev) { in_dev = __in_dev_get_rcu(dev); if (in_dev) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; } } rcu_read_unlock(); return addr; } EXPORT_SYMBOL(inet_confirm_addr); /* * Device notifier */ int register_inetaddr_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&inetaddr_chain, nb); } EXPORT_SYMBOL(register_inetaddr_notifier); int unregister_inetaddr_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&inetaddr_chain, nb); } EXPORT_SYMBOL(unregister_inetaddr_notifier); int register_inetaddr_validator_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&inetaddr_validator_chain, nb); } EXPORT_SYMBOL(register_inetaddr_validator_notifier); int unregister_inetaddr_validator_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&inetaddr_validator_chain, nb); } EXPORT_SYMBOL(unregister_inetaddr_validator_notifier); /* Rename ifa_labels for a device name change. Make some effort to preserve * existing alias numbering and to create unique labels if possible. */ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) { struct in_ifaddr *ifa; int named = 0; in_dev_for_each_ifa_rtnl(ifa, in_dev) { char old[IFNAMSIZ], *dot; memcpy(old, ifa->ifa_label, IFNAMSIZ); memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); if (named++ == 0) goto skip; dot = strchr(old, ':'); if (!dot) { sprintf(old, ":%d", named); dot = old; } if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) strcat(ifa->ifa_label, dot); else strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); skip: rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) { const struct in_ifaddr *ifa; in_dev_for_each_ifa_rtnl(ifa, in_dev) { arp_send(ARPOP_REQUEST, ETH_P_ARP, ifa->ifa_local, dev, ifa->ifa_local, NULL, dev->dev_addr, NULL); } } /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); if (IS_ERR(in_dev)) return notifier_from_errno(PTR_ERR(in_dev)); if (dev->flags & IFF_LOOPBACK) { IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } } else if (event == NETDEV_CHANGEMTU) { /* Re-enabling IP */ if (inetdev_valid_mtu(dev->mtu)) in_dev = inetdev_init(dev); } goto out; } switch (event) { case NETDEV_REGISTER: pr_debug("%s: bug\n", __func__); RCU_INIT_POINTER(dev->ip_ptr, NULL); break; case NETDEV_UP: if (!inetdev_valid_mtu(dev->mtu)) break; if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa = inet_alloc_ifa(in_dev); if (ifa) { ifa->ifa_local = ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; ifa->ifa_mask = inet_make_mask(8); ifa->ifa_scope = RT_SCOPE_HOST; memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ipv4_devconf_setall(in_dev); neigh_parms_data_state_setall(in_dev->arp_parms); inet_insert_ifa(ifa); } } ip_mc_up(in_dev); fallthrough; case NETDEV_CHANGEADDR: if (!IN_DEV_ARP_NOTIFY(in_dev)) break; fallthrough; case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ inetdev_send_gratuitous_arp(dev, in_dev); break; case NETDEV_DOWN: ip_mc_down(in_dev); break; case NETDEV_PRE_TYPE_CHANGE: ip_mc_unmap(in_dev); break; case NETDEV_POST_TYPE_CHANGE: ip_mc_remap(in_dev); break; case NETDEV_CHANGEMTU: if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ fallthrough; case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; case NETDEV_CHANGENAME: /* Do not notify about label change, this event is * not interesting to applications using netlink. */ inetdev_changename(dev, in_dev); devinet_sysctl_unregister(in_dev); devinet_sysctl_register(in_dev); break; } out: return NOTIFY_DONE; } static struct notifier_block ip_netdev_notifier = { .notifier_call = inetdev_event, }; static size_t inet_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + nla_total_size(4) /* IFA_FLAGS */ + nla_total_size(1) /* IFA_PROTO */ + nla_total_size(4) /* IFA_RT_PRIORITY */ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ } static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, unsigned long tstamp, u32 preferred, u32 valid) { struct ifa_cacheinfo ci; ci.cstamp = cstamp_delta(cstamp); ci.tstamp = cstamp_delta(tstamp); ci.ifa_prefered = preferred; ci.ifa_valid = valid; return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); } static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, struct inet_fill_args *args) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; unsigned long tstamp; u32 preferred, valid; u32 flags; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), args->flags); if (!nlh) return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; flags = READ_ONCE(ifa->ifa_flags); /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits. * The 32bit value is given in IFA_FLAGS attribute. */ ifm->ifa_flags = (__u8)flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; if (args->netnsid >= 0 && nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto nla_put_failure; tstamp = READ_ONCE(ifa->ifa_tstamp); if (!(flags & IFA_F_PERMANENT)) { preferred = READ_ONCE(ifa->ifa_preferred_lft); valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - tstamp) / HZ; if (preferred > tval) preferred -= tval; else preferred = 0; if (valid != INFINITY_LIFE_TIME) { if (valid > tval) valid -= tval; else valid = 0; } } } else { preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } if ((ifa->ifa_address && nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || (ifa->ifa_broadcast && nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || (ifa->ifa_proto && nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || nla_put_u32(skb, IFA_FLAGS, flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, preferred, valid)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, struct inet_fill_args *fillargs, struct net **tgt_net, struct sock *sk, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[IFA_MAX+1]; struct ifaddrmsg *ifm; int err, i; ifm = nlmsg_payload(nlh, sizeof(*ifm)); if (!ifm) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request"); return -EINVAL; } if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request"); return -EINVAL; } fillargs->ifindex = ifm->ifa_index; if (fillargs->ifindex) { cb->answer_flags |= NLM_F_DUMP_FILTERED; fillargs->flags |= NLM_F_DUMP_FILTERED; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) return err; for (i = 0; i <= IFA_MAX; ++i) { if (!tb[i]) continue; if (i == IFA_TARGET_NETNSID) { struct net *net; fillargs->netnsid = nla_get_s32(tb[i]); net = rtnl_get_net_ns_capable(sk, fillargs->netnsid); if (IS_ERR(net)) { fillargs->netnsid = -1; NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id"); return PTR_ERR(net); } *tgt_net = net; } else { NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request"); return -EINVAL; } } return 0; } static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb, struct netlink_callback *cb, int *s_ip_idx, struct inet_fill_args *fillargs) { struct ip_mc_list *im; int ip_idx = 0; int err; for (im = rcu_dereference(in_dev->mc_list); im; im = rcu_dereference(im->next_rcu)) { if (ip_idx < *s_ip_idx) { ip_idx++; continue; } err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs); if (err < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ip_idx++; } err = 0; ip_idx = 0; done: *s_ip_idx = ip_idx; return err; } static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb, struct netlink_callback *cb, int *s_ip_idx, struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; int err; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (ip_idx < *s_ip_idx) { ip_idx++; continue; } err = inet_fill_ifaddr(skb, ifa, fillargs); if (err < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ip_idx++; } err = 0; ip_idx = 0; done: *s_ip_idx = ip_idx; return err; } static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, struct netlink_callback *cb, int *s_ip_idx, struct inet_fill_args *fillargs) { switch (fillargs->event) { case RTM_NEWADDR: return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs); case RTM_GETMULTICAST: return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx, fillargs); default: return -EINVAL; } } /* Combine dev_addr_genid and dev_base_seq to detect changes. */ static u32 inet_base_seq(const struct net *net) { u32 res = atomic_read(&net->ipv4.dev_addr_genid) + READ_ONCE(net->dev_base_seq); /* Must not return 0 (see nl_dump_check_consistent()). * Chose a value far away from 0. */ if (!res) res = 0x80000000; return res; } static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, int event) { const struct nlmsghdr *nlh = cb->nlh; struct inet_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = nlh->nlmsg_seq, .event = event, .flags = NLM_F_MULTI, .netnsid = -1, }; struct net *net = sock_net(skb->sk); struct net *tgt_net = net; struct { unsigned long ifindex; int ip_idx; } *ctx = (void *)cb->ctx; struct in_device *in_dev; struct net_device *dev; int err = 0; rcu_read_lock(); if (cb->strict_check) { err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) goto done; if (fillargs.ifindex) { dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); if (!dev) { err = -ENODEV; goto done; } in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto done; err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, &fillargs); goto done; } } cb->seq = inet_base_seq(tgt_net); for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { in_dev = __in_dev_get_rcu(dev); if (!in_dev) continue; err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, &fillargs); if (err < 0) goto done; } done: if (fillargs.netnsid >= 0) put_net(tgt_net); rcu_read_unlock(); return err; } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { return inet_dump_addr(skb, cb, RTM_NEWADDR); } static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { return inet_dump_addr(skb, cb, RTM_GETMULTICAST); } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { struct inet_fill_args fillargs = { .portid = portid, .seq = nlh ? nlh->nlmsg_seq : 0, .event = event, .flags = 0, .netnsid = -1, }; struct sk_buff *skb; int err = -ENOBUFS; struct net *net; net = dev_net(ifa->ifa_dev->dev); skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); if (!skb) goto errout; err = inet_fill_ifaddr(skb, ifa, &fillargs); if (err < 0) { /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); return; errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } static size_t inet_get_link_af_size(const struct net_device *dev, u32 ext_filter_mask) { struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); if (!in_dev) return 0; return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ } static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, u32 ext_filter_mask) { struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); struct nlattr *nla; int i; if (!in_dev) return -ENODATA; nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); if (!nla) return -EMSGSIZE; for (i = 0; i < IPV4_DEVCONF_MAX; i++) ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]); return 0; } static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { [IFLA_INET_CONF] = { .type = NLA_NESTED }, }; static int inet_validate_link_af(const struct net_device *dev, const struct nlattr *nla, struct netlink_ext_ack *extack) { struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT; err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, inet_af_policy, extack); if (err < 0) return err; if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { int cfgid = nla_type(a); if (nla_len(a) < 4) return -EINVAL; if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) return -EINVAL; } } return 0; } static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, struct netlink_ext_ack *extack) { struct in_device *in_dev = __in_dev_get_rtnl(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; int rem; if (!in_dev) return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) return -EINVAL; if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); } return 0; } static int inet_netconf_msgsize_devconf(int type) { int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + nla_total_size(4); /* NETCONFA_IFINDEX */ bool all = false; if (type == NETCONFA_ALL) all = true; if (all || type == NETCONFA_FORWARDING) size += nla_total_size(4); if (all || type == NETCONFA_RP_FILTER) size += nla_total_size(4); if (all || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); if (all || type == NETCONFA_BC_FORWARDING) size += nla_total_size(4); if (all || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) size += nla_total_size(4); return size; } static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, const struct ipv4_devconf *devconf, u32 portid, u32 seq, int event, unsigned int flags, int type) { struct nlmsghdr *nlh; struct netconfmsg *ncm; bool all = false; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); if (!nlh) return -EMSGSIZE; if (type == NETCONFA_ALL) all = true; ncm = nlmsg_data(nlh); ncm->ncm_family = AF_INET; if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; if (!devconf) goto out; if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_RP_FILTER) && nla_put_s32(skb, NETCONFA_RP_FILTER, IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_BC_FORWARDING) && nla_put_s32(skb, NETCONFA_BC_FORWARDING, IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_PROXY_NEIGH) && nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, IPV4_DEVCONF_RO(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) goto nla_put_failure; out: nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } void inet_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv4_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL); if (!skb) goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL); return; errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); } static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet_netconf_valid_get_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { int i, err; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request"); return -EINVAL; } if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), tb, NETCONFA_MAX, devconf_ipv4_policy, extack); err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), tb, NETCONFA_MAX, devconf_ipv4_policy, extack); if (err) return err; for (i = 0; i <= NETCONFA_MAX; i++) { if (!tb[i]) continue; switch (i) { case NETCONFA_IFINDEX: break; default: NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request"); return -EINVAL; } } return 0; } static int inet_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX + 1]; const struct ipv4_devconf *devconf; struct in_device *in_dev = NULL; struct net_device *dev = NULL; struct sk_buff *skb; int ifindex; int err; err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err) return err; if (!tb[NETCONFA_IFINDEX]) return -EINVAL; ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); switch (ifindex) { case NETCONFA_IFINDEX_ALL: devconf = net->ipv4.devconf_all; break; case NETCONFA_IFINDEX_DEFAULT: devconf = net->ipv4.devconf_dflt; break; default: err = -ENODEV; dev = dev_get_by_index(net, ifindex); if (dev) in_dev = in_dev_get(dev); if (!in_dev) goto errout; devconf = &in_dev->cnf; break; } err = -ENOBUFS; skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); if (!skb) goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWNETCONF, 0, NETCONFA_ALL); if (err < 0) { /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: if (in_dev) in_dev_put(in_dev); dev_put(dev); return err; } static int inet_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct { unsigned long ifindex; unsigned int all_default; } *ctx = (void *)cb->ctx; const struct in_device *in_dev; struct net_device *dev; int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; struct netconfmsg *ncm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request"); return -EINVAL; } if (nlmsg_attrlen(nlh, sizeof(*ncm))) { NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request"); return -EINVAL; } } rcu_read_lock(); for_each_netdev_dump(net, dev, ctx->ifindex) { in_dev = __in_dev_get_rcu(dev); if (!in_dev) continue; err = inet_netconf_fill_devconf(skb, dev->ifindex, &in_dev->cnf, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, NETCONFA_ALL); if (err < 0) goto done; } if (ctx->all_default == 0) { err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, NETCONFA_ALL); if (err < 0) goto done; ctx->all_default++; } if (ctx->all_default == 1) { err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, NETCONFA_ALL); if (err < 0) goto done; ctx->all_default++; } done: rcu_read_unlock(); return err; } #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(net, dev) { struct in_device *in_dev; in_dev = __in_dev_get_rcu(dev); if (in_dev && !test_bit(i, in_dev->cnf.state)) in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; } rcu_read_unlock(); } /* called with RTNL locked */ static void inet_forward_change(struct net *net) { struct net_device *dev; int on = IPV4_DEVCONF_ALL(net, FORWARDING); IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); for_each_netdev(net, dev) { struct in_device *in_dev; if (on) dev_disable_lro(dev); in_dev = __in_dev_get_rtnl_net(dev); if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, dev->ifindex, &in_dev->cnf); } } } static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) { if (cnf == net->ipv4.devconf_dflt) return NETCONFA_IFINDEX_DEFAULT; else if (cnf == net->ipv4.devconf_all) return NETCONFA_IFINDEX_ALL; else { struct in_device *idev = container_of(cnf, struct in_device, cnf); return idev->dev->ifindex; } } static int devinet_conf_proc(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); int new_value = *(int *)ctl->data; if (write) { struct ipv4_devconf *cnf = ctl->extra1; struct net *net = ctl->extra2; int i = (int *)ctl->data - cnf->data; int ifindex; set_bit(i, cnf->state); if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); if (i == IPV4_DEVCONF_BC_FORWARDING - 1 && new_value != old_value) rt_cache_flush(net); if (i == IPV4_DEVCONF_RP_FILTER - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_RP_FILTER, ifindex, cnf); } if (i == IPV4_DEVCONF_PROXY_ARP - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_PROXY_NEIGH, ifindex, cnf); } if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, ifindex, cnf); } } return ret; } static int devinet_sysctl_forward(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; struct net *net = ctl->extra2; int ret; if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { if (!rtnl_net_trylock(net)) { /* Restore the original values before restarting */ *valp = val; *ppos = pos; return restart_syscall(); } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); } else { struct ipv4_devconf *cnf = ctl->extra1; struct in_device *idev = container_of(cnf, struct in_device, cnf); if (*valp) dev_disable_lro(idev->dev); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, idev->dev->ifindex, cnf); } rtnl_net_unlock(net); rt_cache_flush(net); } else inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); } return ret; } static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); struct net *net = ctl->extra2; if (write && *valp != val) rt_cache_flush(net); return ret; } #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ { \ .procname = name, \ .data = ipv4_devconf.data + \ IPV4_DEVCONF_ ## attr - 1, \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ .extra1 = &ipv4_devconf, \ } #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table devinet_vars[IPV4_DEVCONF_MAX]; } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", devinet_sysctl_forward), DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER, "arp_evict_nocarrier"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, "force_igmp_version"), DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL, "igmpv2_unsolicited_report_interval"), DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, "igmpv3_unsolicited_report_interval"), DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, "ignore_routes_with_linkdown"), DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP, "drop_gratuitous_arp"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, "promote_secondaries"), DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, "route_localnet"), DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, "drop_unicast_in_l2_multicast"), }, }; static int __devinet_sysctl_register(struct net *net, char *dev_name, int ifindex, struct ipv4_devconf *p) { int i; struct devinet_sysctl_table *t; char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT); if (!t) goto out; for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) { t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; t->devinet_vars[i].extra1 = p; t->devinet_vars[i].extra2 = net; } snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); if (!t->sysctl_header) goto free; p->sysctl = t; inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, ifindex, p); return 0; free: kfree(t); out: return -ENOMEM; } static void __devinet_sysctl_unregister(struct net *net, struct ipv4_devconf *cnf, int ifindex) { struct devinet_sysctl_table *t = cnf->sysctl; if (t) { cnf->sysctl = NULL; unregister_net_sysctl_table(t->sysctl_header); kfree(t); } inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int devinet_sysctl_register(struct in_device *idev) { int err; if (!sysctl_dev_name_is_allowed(idev->dev->name)) return -EINVAL; err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); if (err) return err; err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, &idev->cnf); if (err) neigh_sysctl_unregister(idev->arp_parms); return err; } static void devinet_sysctl_unregister(struct in_device *idev) { struct net *net = dev_net(idev->dev); __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex); neigh_sysctl_unregister(idev->arp_parms); } static struct ctl_table ctl_forward_entry[] = { { .procname = "ip_forward", .data = &ipv4_devconf.data[ IPV4_DEVCONF_FORWARDING - 1], .maxlen = sizeof(int), .mode = 0644, .proc_handler = devinet_sysctl_forward, .extra1 = &ipv4_devconf, .extra2 = &init_net, }, }; #endif static __net_init int devinet_init_net(struct net *net) { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table *tbl; #endif struct ipv4_devconf *all, *dflt; int err; int i; err = -ENOMEM; net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!net->ipv4.inet_addr_lst) goto err_alloc_hash; all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL); if (!all) goto err_alloc_all; dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); if (!dflt) goto err_alloc_dflt; #ifdef CONFIG_SYSCTL tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL); if (!tbl) goto err_alloc_ctl; tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; tbl[0].extra1 = all; tbl[0].extra2 = net; #endif if (!net_eq(net, &init_net)) { switch (net_inherit_devconf()) { case 3: /* copy from the current netns */ memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all, sizeof(ipv4_devconf)); memcpy(dflt, current->nsproxy->net_ns->ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); break; case 0: case 1: /* copy from init_net */ memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); break; case 2: /* use compiled values */ break; } } #ifdef CONFIG_SYSCTL err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all); if (err < 0) goto err_reg_all; err = __devinet_sysctl_register(net, "default", NETCONFA_IFINDEX_DEFAULT, dflt); if (err < 0) goto err_reg_dflt; err = -ENOMEM; forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl, ARRAY_SIZE(ctl_forward_entry)); if (!forw_hdr) goto err_reg_ctl; net->ipv4.forw_hdr = forw_hdr; #endif for (i = 0; i < IN4_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]); INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime); net->ipv4.devconf_all = all; net->ipv4.devconf_dflt = dflt; return 0; #ifdef CONFIG_SYSCTL err_reg_ctl: __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT); err_reg_dflt: __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: kfree(tbl); err_alloc_ctl: #endif kfree(dflt); err_alloc_dflt: kfree(all); err_alloc_all: kfree(net->ipv4.inet_addr_lst); err_alloc_hash: return err; } static __net_exit void devinet_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL const struct ctl_table *tbl; #endif cancel_delayed_work_sync(&net->ipv4.addr_chk_work); #ifdef CONFIG_SYSCTL tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, NETCONFA_IFINDEX_DEFAULT); __devinet_sysctl_unregister(net, net->ipv4.devconf_all, NETCONFA_IFINDEX_ALL); kfree(tbl); #endif kfree(net->ipv4.devconf_dflt); kfree(net->ipv4.devconf_all); kfree(net->ipv4.inet_addr_lst); } static __net_initdata struct pernet_operations devinet_ops = { .init = devinet_init_net, .exit = devinet_exit_net, }; static struct rtnl_af_ops inet_af_ops __read_mostly = { .family = AF_INET, .fill_link_af = inet_fill_link_af, .get_link_af_size = inet_get_link_af_size, .validate_link_af = inet_validate_link_af, .set_link_af = inet_set_link_af, }; static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr, .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST, .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; void __init devinet_init(void) { register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); if (rtnl_af_register(&inet_af_ops)) panic("Unable to register inet_af_ops\n"); rtnl_register_many(devinet_rtnl_msg_handlers); } |
| 9 9 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | // SPDX-License-Identifier: GPL-2.0-only /* * CAIF Framing Layer. * * Copyright (C) ST-Ericsson AB 2010 * Author: Sjur Brendeland */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/stddef.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/crc-ccitt.h> #include <linux/netdevice.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/cffrml.h> #define container_obj(layr) container_of(layr, struct cffrml, layer) struct cffrml { struct cflayer layer; bool dofcs; /* !< FCS active */ int __percpu *pcpu_refcnt; }; static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid); static u32 cffrml_rcv_error; static u32 cffrml_rcv_checsum_error; struct cflayer *cffrml_create(u16 phyid, bool use_fcs) { struct cffrml *this = kzalloc(sizeof(struct cffrml), GFP_ATOMIC); if (!this) return NULL; this->pcpu_refcnt = alloc_percpu(int); if (this->pcpu_refcnt == NULL) { kfree(this); return NULL; } caif_assert(offsetof(struct cffrml, layer) == 0); this->layer.receive = cffrml_receive; this->layer.transmit = cffrml_transmit; this->layer.ctrlcmd = cffrml_ctrlcmd; snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid); this->dofcs = use_fcs; this->layer.id = phyid; return (struct cflayer *) this; } void cffrml_free(struct cflayer *layer) { struct cffrml *this = container_obj(layer); free_percpu(this->pcpu_refcnt); kfree(layer); } void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up) { this->up = up; } void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn) { this->dn = dn; } static u16 cffrml_checksum(u16 chks, void *buf, u16 len) { /* FIXME: FCS should be moved to glue in order to use OS-Specific * solutions */ return crc_ccitt(chks, buf, len); } static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) { u16 tmp; u16 len; u16 hdrchks; int pktchks; struct cffrml *this; this = container_obj(layr); cfpkt_extr_head(pkt, &tmp, 2); len = le16_to_cpu(tmp); /* Subtract for FCS on length if FCS is not used. */ if (!this->dofcs) len -= 2; if (cfpkt_setlen(pkt, len) < 0) { ++cffrml_rcv_error; pr_err("Framing length error (%d)\n", len); cfpkt_destroy(pkt); return -EPROTO; } /* * Don't do extract if FCS is false, rather do setlen - then we don't * get a cache-miss. */ if (this->dofcs) { cfpkt_extr_trail(pkt, &tmp, 2); hdrchks = le16_to_cpu(tmp); pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); if (pktchks != hdrchks) { cfpkt_add_trail(pkt, &tmp, 2); ++cffrml_rcv_error; ++cffrml_rcv_checsum_error; pr_info("Frame checksum error (0x%x != 0x%x)\n", hdrchks, pktchks); return -EILSEQ; } } if (cfpkt_erroneous(pkt)) { ++cffrml_rcv_error; pr_err("Packet is erroneous!\n"); cfpkt_destroy(pkt); return -EPROTO; } if (layr->up == NULL) { pr_err("Layr up is missing!\n"); cfpkt_destroy(pkt); return -EINVAL; } return layr->up->receive(layr->up, pkt); } static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) { u16 chks; u16 len; __le16 data; struct cffrml *this = container_obj(layr); if (this->dofcs) { chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); data = cpu_to_le16(chks); cfpkt_add_trail(pkt, &data, 2); } else { cfpkt_pad_trail(pkt, 2); } len = cfpkt_getlen(pkt); data = cpu_to_le16(len); cfpkt_add_head(pkt, &data, 2); cfpkt_info(pkt)->hdr_len += 2; if (cfpkt_erroneous(pkt)) { pr_err("Packet is erroneous!\n"); cfpkt_destroy(pkt); return -EPROTO; } if (layr->dn == NULL) { cfpkt_destroy(pkt); return -ENODEV; } return layr->dn->transmit(layr->dn, pkt); } static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { if (layr->up && layr->up->ctrlcmd) layr->up->ctrlcmd(layr->up, ctrl, layr->id); } void cffrml_put(struct cflayer *layr) { struct cffrml *this = container_obj(layr); if (layr != NULL && this->pcpu_refcnt != NULL) this_cpu_dec(*this->pcpu_refcnt); } void cffrml_hold(struct cflayer *layr) { struct cffrml *this = container_obj(layr); if (layr != NULL && this->pcpu_refcnt != NULL) this_cpu_inc(*this->pcpu_refcnt); } int cffrml_refcnt_read(struct cflayer *layr) { int i, refcnt = 0; struct cffrml *this = container_obj(layr); for_each_possible_cpu(i) refcnt += *per_cpu_ptr(this->pcpu_refcnt, i); return refcnt; } |
| 14 6 8 9 2 2 2 5 5 2 1 1 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 | // SPDX-License-Identifier: GPL-2.0-only /* * Input device TTY line discipline * * Copyright (c) 1999-2002 Vojtech Pavlik * * This is a module that converts a tty line into a much simpler * 'serial io port' abstraction that the input device drivers use. */ #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/serio.h> #include <linux/tty.h> #include <linux/compat.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION("Input device TTY line discipline"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_MOUSE); #define SERPORT_BUSY 1 #define SERPORT_ACTIVE 2 #define SERPORT_DEAD 3 struct serport { struct tty_struct *tty; wait_queue_head_t wait; struct serio *serio; struct serio_device_id id; spinlock_t lock; unsigned long flags; }; /* * Callback functions from the serio code. */ static int serport_serio_write(struct serio *serio, unsigned char data) { struct serport *serport = serio->port_data; return -(serport->tty->ops->write(serport->tty, &data, 1) != 1); } static int serport_serio_open(struct serio *serio) { struct serport *serport = serio->port_data; guard(spinlock_irqsave)(&serport->lock); set_bit(SERPORT_ACTIVE, &serport->flags); return 0; } static void serport_serio_close(struct serio *serio) { struct serport *serport = serio->port_data; guard(spinlock_irqsave)(&serport->lock); clear_bit(SERPORT_ACTIVE, &serport->flags); } /* * serport_ldisc_open() is the routine that is called upon setting our line * discipline on a tty. It prepares the serio struct. */ static int serport_ldisc_open(struct tty_struct *tty) { struct serport *serport; if (!capable(CAP_SYS_ADMIN)) return -EPERM; serport = kzalloc(sizeof(*serport), GFP_KERNEL); if (!serport) return -ENOMEM; serport->tty = tty; spin_lock_init(&serport->lock); init_waitqueue_head(&serport->wait); tty->disc_data = serport; tty->receive_room = 256; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); return 0; } /* * serport_ldisc_close() is the opposite of serport_ldisc_open() */ static void serport_ldisc_close(struct tty_struct *tty) { struct serport *serport = tty->disc_data; kfree(serport); } /* * serport_ldisc_receive() is called by the low level tty driver when characters * are ready for us. We forward the characters and flags, one by one to the * 'interrupt' routine. */ static void serport_ldisc_receive(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct serport *serport = tty->disc_data; unsigned int ch_flags = 0; int i; guard(spinlock_irqsave)(&serport->lock); if (!test_bit(SERPORT_ACTIVE, &serport->flags)) return; for (i = 0; i < count; i++) { if (fp) { switch (fp[i]) { case TTY_FRAME: ch_flags = SERIO_FRAME; break; case TTY_PARITY: ch_flags = SERIO_PARITY; break; default: ch_flags = 0; break; } } serio_interrupt(serport->serio, cp[i], ch_flags); } } /* * serport_ldisc_read() just waits indefinitely if everything goes well. * However, when the serio driver closes the serio port, it finishes, * returning 0 characters. */ static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, u8 *kbuf, size_t nr, void **cookie, unsigned long offset) { struct serport *serport = tty->disc_data; struct serio *serio; if (test_and_set_bit(SERPORT_BUSY, &serport->flags)) return -EBUSY; serport->serio = serio = kzalloc(sizeof(*serio), GFP_KERNEL); if (!serio) return -ENOMEM; strscpy(serio->name, "Serial port", sizeof(serio->name)); snprintf(serio->phys, sizeof(serio->phys), "%s/serio0", tty_name(tty)); serio->id = serport->id; serio->id.type = SERIO_RS232; serio->write = serport_serio_write; serio->open = serport_serio_open; serio->close = serport_serio_close; serio->port_data = serport; serio->dev.parent = tty->dev; serio_register_port(serport->serio); printk(KERN_INFO "serio: Serial port %s\n", tty_name(tty)); wait_event_interruptible(serport->wait, test_bit(SERPORT_DEAD, &serport->flags)); serio_unregister_port(serport->serio); serport->serio = NULL; clear_bit(SERPORT_DEAD, &serport->flags); clear_bit(SERPORT_BUSY, &serport->flags); return 0; } static void serport_set_type(struct tty_struct *tty, unsigned long type) { struct serport *serport = tty->disc_data; serport->id.proto = type & 0x000000ff; serport->id.id = (type & 0x0000ff00) >> 8; serport->id.extra = (type & 0x00ff0000) >> 16; } /* * serport_ldisc_ioctl() allows to set the port protocol, and device ID */ static int serport_ldisc_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { if (cmd == SPIOCSTYPE) { unsigned long type; if (get_user(type, (unsigned long __user *) arg)) return -EFAULT; serport_set_type(tty, type); return 0; } return -EINVAL; } #ifdef CONFIG_COMPAT #define COMPAT_SPIOCSTYPE _IOW('q', 0x01, compat_ulong_t) static int serport_ldisc_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { if (cmd == COMPAT_SPIOCSTYPE) { void __user *uarg = compat_ptr(arg); compat_ulong_t compat_type; if (get_user(compat_type, (compat_ulong_t __user *)uarg)) return -EFAULT; serport_set_type(tty, compat_type); return 0; } return -EINVAL; } #endif static void serport_ldisc_hangup(struct tty_struct *tty) { struct serport *serport = tty->disc_data; scoped_guard(spinlock_irqsave, &serport->lock) set_bit(SERPORT_DEAD, &serport->flags); wake_up_interruptible(&serport->wait); } static void serport_ldisc_write_wakeup(struct tty_struct * tty) { struct serport *serport = tty->disc_data; guard(spinlock_irqsave)(&serport->lock); if (test_bit(SERPORT_ACTIVE, &serport->flags)) serio_drv_write_wakeup(serport->serio); } /* * The line discipline structure. */ static struct tty_ldisc_ops serport_ldisc = { .owner = THIS_MODULE, .num = N_MOUSE, .name = "input", .open = serport_ldisc_open, .close = serport_ldisc_close, .read = serport_ldisc_read, .ioctl = serport_ldisc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = serport_ldisc_compat_ioctl, #endif .receive_buf = serport_ldisc_receive, .hangup = serport_ldisc_hangup, .write_wakeup = serport_ldisc_write_wakeup }; /* * The functions for insering/removing us as a module. */ static int __init serport_init(void) { int retval; retval = tty_register_ldisc(&serport_ldisc); if (retval) printk(KERN_ERR "serport.c: Error registering line discipline.\n"); return retval; } static void __exit serport_exit(void) { tty_unregister_ldisc(&serport_ldisc); } module_init(serport_init); module_exit(serport_exit); |
| 45 252 1 137 1 2 5 60 381 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_MISC_H #define BTRFS_MISC_H #include <linux/types.h> #include <linux/bitmap.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/math64.h> #include <linux/rbtree.h> #include <linux/bio.h> /* * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. */ #define ENUM_BIT(name) \ __ ## name ## _BIT, \ name = (1U << __ ## name ## _BIT), \ __ ## name ## _SEQ = __ ## name ## _BIT static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter) { struct bio_vec bv = bio_iter_iovec(bio, *iter); return bvec_phys(&bv); } /* * Iterate bio using btrfs block size. * * This will handle large folio and highmem. * * @paddr: Physical memory address of each iteration * @bio: The bio to iterate * @iter: The bvec_iter (pointer) to use. * @blocksize: The blocksize to iterate. * * This requires all folios in the bio to cover at least one block. */ #define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \ for (; (iter)->bi_size && \ (paddr = bio_iter_phys((bio), (iter)), 1); \ bio_advance_iter_single((bio), (iter), (blocksize))) /* Initialize a bvec_iter to the size of the specified bio. */ static inline struct bvec_iter init_bvec_iter_for_bio(struct bio *bio) { struct bio_vec *bvec; u32 bio_size = 0; int i; bio_for_each_bvec_all(bvec, bio, i) bio_size += bvec->bv_len; return (struct bvec_iter) { .bi_sector = 0, .bi_size = bio_size, .bi_idx = 0, .bi_bvec_done = 0, }; } #define btrfs_bio_for_each_block_all(paddr, bio, blocksize) \ for (struct bvec_iter iter = init_bvec_iter_for_bio(bio); \ (iter).bi_size && \ (paddr = bio_iter_phys((bio), &(iter)), 1); \ bio_advance_iter_single((bio), &(iter), (blocksize))) static inline void cond_wake_up(struct wait_queue_head *wq) { /* * This implies a full smp_mb barrier, see comments for * waitqueue_active why. */ if (wq_has_sleeper(wq)) wake_up(wq); } static inline void cond_wake_up_nomb(struct wait_queue_head *wq) { /* * Special case for conditional wakeup where the barrier required for * waitqueue_active is implied by some of the preceding code. Eg. one * of such atomic operations (atomic_dec_and_return, ...), or a * unlock/lock sequence, etc. */ if (waitqueue_active(wq)) wake_up(wq); } static inline u64 mult_perc(u64 num, u32 percent) { return div_u64(num * percent, 100); } /* Copy of is_power_of_two that is 64bit safe */ static inline bool is_power_of_two_u64(u64 n) { return n != 0 && (n & (n - 1)) == 0; } static inline bool has_single_bit_set(u64 n) { return is_power_of_two_u64(n); } /* * Simple bytenr based rb_tree relate structures * * Any structure wants to use bytenr as single search index should have their * structure start with these members. */ struct rb_simple_node { struct rb_node rb_node; u64 bytenr; }; static inline struct rb_node *rb_simple_search(const struct rb_root *root, u64 bytenr) { struct rb_node *node = root->rb_node; struct rb_simple_node *entry; while (node) { entry = rb_entry(node, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) node = node->rb_left; else if (bytenr > entry->bytenr) node = node->rb_right; else return node; } return NULL; } /* * Search @root from an entry that starts or comes after @bytenr. * * @root: the root to search. * @bytenr: bytenr to search from. * * Return the rb_node that start at or after @bytenr. If there is no entry at * or after @bytner return NULL. */ static inline struct rb_node *rb_simple_search_first(const struct rb_root *root, u64 bytenr) { struct rb_node *node = root->rb_node, *ret = NULL; struct rb_simple_node *entry, *ret_entry = NULL; while (node) { entry = rb_entry(node, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) { if (!ret || entry->bytenr < ret_entry->bytenr) { ret = node; ret_entry = entry; } node = node->rb_left; } else if (bytenr > entry->bytenr) { node = node->rb_right; } else { return node; } } return ret; } static int rb_simple_node_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) { struct rb_simple_node *new_entry = rb_entry(new, struct rb_simple_node, rb_node); struct rb_simple_node *existing_entry = rb_entry(existing, struct rb_simple_node, rb_node); if (new_entry->bytenr < existing_entry->bytenr) return -1; else if (new_entry->bytenr > existing_entry->bytenr) return 1; return 0; } static inline struct rb_node *rb_simple_insert(struct rb_root *root, struct rb_simple_node *simple_node) { return rb_find_add(&simple_node->rb_node, root, rb_simple_node_bytenr_cmp); } static inline bool bitmap_test_range_all_set(const unsigned long *addr, unsigned long start, unsigned long nbits) { unsigned long found_zero; found_zero = find_next_zero_bit(addr, start + nbits, start); return (found_zero == start + nbits); } static inline bool bitmap_test_range_all_zero(const unsigned long *addr, unsigned long start, unsigned long nbits) { unsigned long found_set; found_set = find_next_bit(addr, start + nbits, start); return (found_set == start + nbits); } static inline u64 folio_end(struct folio *folio) { return folio_pos(folio) + folio_size(folio); } #endif |
| 92 92 604 515 92 556 553 22 23 4 4 5 12 3 10 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | // SPDX-License-Identifier: GPL-2.0 /* * linux/ipc/namespace.c * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc. */ #include <linux/ipc.h> #include <linux/msg.h> #include <linux/ipc_namespace.h> #include <linux/rcupdate.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> #include <linux/nstree.h> #include <linux/sched/task.h> #include "util.h" /* * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount. */ static void free_ipc(struct work_struct *unused); static DECLARE_WORK(free_ipc_work, free_ipc); static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES); } static void dec_ipc_namespaces(struct ucounts *ucounts) { dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES); } static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, struct ipc_namespace *old_ns) { struct ipc_namespace *ns; struct ucounts *ucounts; int err; err = -ENOSPC; again: ucounts = inc_ipc_namespaces(user_ns); if (!ucounts) { /* * IPC namespaces are freed asynchronously, by free_ipc_work. * If frees were pending, flush_work will wait, and * return true. Fail the allocation if no frees are pending. */ if (flush_work(&free_ipc_work)) goto again; goto fail; } err = -ENOMEM; ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT); if (ns == NULL) goto fail_dec; err = ns_common_init(ns); if (err) goto fail_free; ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; err = mq_init_ns(ns); if (err) goto fail_put; err = -ENOMEM; if (!setup_mq_sysctls(ns)) goto fail_put; if (!setup_ipc_sysctls(ns)) goto fail_mq; err = msg_init_ns(ns); if (err) goto fail_ipc; sem_init_ns(ns); shm_init_ns(ns); ns_tree_add(ns); return ns; fail_ipc: retire_ipc_sysctls(ns); fail_mq: retire_mq_sysctls(ns); fail_put: put_user_ns(ns->user_ns); ns_common_free(ns); fail_free: kfree(ns); fail_dec: dec_ipc_namespaces(ucounts); fail: return ERR_PTR(err); } struct ipc_namespace *copy_ipcs(u64 flags, struct user_namespace *user_ns, struct ipc_namespace *ns) { if (!(flags & CLONE_NEWIPC)) return get_ipc_ns(ns); return create_ipc_ns(user_ns, ns); } /* * free_ipcs - free all ipcs of one type * @ns: the namespace to remove the ipcs from * @ids: the table of ipcs to free * @free: the function called to free each individual ipc * * Called for each kind of ipc when an ipc_namespace exits. */ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, void (*free)(struct ipc_namespace *, struct kern_ipc_perm *)) { struct kern_ipc_perm *perm; int next_id; int total, in_use; down_write(&ids->rwsem); in_use = ids->in_use; for (total = 0, next_id = 0; total < in_use; next_id++) { perm = idr_find(&ids->ipcs_idr, next_id); if (perm == NULL) continue; rcu_read_lock(); ipc_lock_object(perm); free(ns, perm); total++; } up_write(&ids->rwsem); } static void free_ipc_ns(struct ipc_namespace *ns) { /* * Caller needs to wait for an RCU grace period to have passed * after making the mount point inaccessible to new accesses. */ mntput(ns->mq_mnt); sem_exit_ns(ns); msg_exit_ns(ns); shm_exit_ns(ns); retire_mq_sysctls(ns); retire_ipc_sysctls(ns); dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_common_free(ns); kfree(ns); } static LLIST_HEAD(free_ipc_list); static void free_ipc(struct work_struct *unused) { struct llist_node *node = llist_del_all(&free_ipc_list); struct ipc_namespace *n, *t; llist_for_each_entry_safe(n, t, node, mnt_llist) mnt_make_shortterm(n->mq_mnt); /* Wait for any last users to have gone away. */ synchronize_rcu(); llist_for_each_entry_safe(n, t, node, mnt_llist) free_ipc_ns(n); } /* * put_ipc_ns - drop a reference to an ipc namespace. * @ns: the namespace to put * * If this is the last task in the namespace exiting, and * it is dropping the refcount to 0, then it can race with * a task in another ipc namespace but in a mounts namespace * which has this ipcns's mqueuefs mounted, doing some action * with one of the mqueuefs files. That can raise the refcount. * So dropping the refcount, and raising the refcount when * accessing it through the VFS, are protected with mq_lock. * * (Clearly, a task raising the refcount on its own ipc_ns * needn't take mq_lock since it can't race with the last task * in the ipcns exiting). */ void put_ipc_ns(struct ipc_namespace *ns) { if (ns_ref_put_and_lock(ns, &mq_lock)) { mq_clear_sbinfo(ns); spin_unlock(&mq_lock); ns_tree_remove(ns); if (llist_add(&ns->mnt_llist, &free_ipc_list)) schedule_work(&free_ipc_work); } } static struct ns_common *ipcns_get(struct task_struct *task) { struct ipc_namespace *ns = NULL; struct nsproxy *nsproxy; task_lock(task); nsproxy = task->nsproxy; if (nsproxy) ns = get_ipc_ns(nsproxy->ipc_ns); task_unlock(task); return ns ? &ns->ns : NULL; } static void ipcns_put(struct ns_common *ns) { return put_ipc_ns(to_ipc_ns(ns)); } static int ipcns_install(struct nsset *nsset, struct ns_common *new) { struct nsproxy *nsproxy = nsset->nsproxy; struct ipc_namespace *ns = to_ipc_ns(new); if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) return -EPERM; put_ipc_ns(nsproxy->ipc_ns); nsproxy->ipc_ns = get_ipc_ns(ns); return 0; } static struct user_namespace *ipcns_owner(struct ns_common *ns) { return to_ipc_ns(ns)->user_ns; } const struct proc_ns_operations ipcns_operations = { .name = "ipc", .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, .owner = ipcns_owner, }; |
| 33 25 8 31 428 426 298 429 90 81 9 2 2 4 1 2 3 4 5 3 6 5 4 8 1 9 9 60 1 57 4 61 61 67 186 225 13 220 176 59 21 80 226 210 9 61 226 67 11 34 34 34 34 34 34 34 34 34 34 11 11 11 11 11 39 39 39 16 29 34 34 34 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #include <linux/iversion.h> #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_inode_util.h" #include "xfs_trans.h" #include "xfs_ialloc.h" #include "xfs_health.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_ag.h" #include "xfs_iunlink_item.h" #include "xfs_inode_item.h" uint16_t xfs_flags2diflags( struct xfs_inode *ip, unsigned int xflags) { /* can't set PREALLOC this way, just preserve it */ uint16_t di_flags = (ip->i_diflags & XFS_DIFLAG_PREALLOC); if (xflags & FS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; if (xflags & FS_XFLAG_APPEND) di_flags |= XFS_DIFLAG_APPEND; if (xflags & FS_XFLAG_SYNC) di_flags |= XFS_DIFLAG_SYNC; if (xflags & FS_XFLAG_NOATIME) di_flags |= XFS_DIFLAG_NOATIME; if (xflags & FS_XFLAG_NODUMP) di_flags |= XFS_DIFLAG_NODUMP; if (xflags & FS_XFLAG_NODEFRAG) di_flags |= XFS_DIFLAG_NODEFRAG; if (xflags & FS_XFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; if (S_ISDIR(VFS_I(ip)->i_mode)) { if (xflags & FS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; if (xflags & FS_XFLAG_NOSYMLINKS) di_flags |= XFS_DIFLAG_NOSYMLINKS; if (xflags & FS_XFLAG_EXTSZINHERIT) di_flags |= XFS_DIFLAG_EXTSZINHERIT; if (xflags & FS_XFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(VFS_I(ip)->i_mode)) { if (xflags & FS_XFLAG_REALTIME) di_flags |= XFS_DIFLAG_REALTIME; if (xflags & FS_XFLAG_EXTSIZE) di_flags |= XFS_DIFLAG_EXTSIZE; } return di_flags; } uint64_t xfs_flags2diflags2( struct xfs_inode *ip, unsigned int xflags) { uint64_t di_flags2 = (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; if (xflags & FS_XFLAG_COWEXTSIZE) di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; return di_flags2; } uint32_t xfs_ip2xflags( struct xfs_inode *ip) { uint32_t flags = 0; if (ip->i_diflags & XFS_DIFLAG_ANY) { if (ip->i_diflags & XFS_DIFLAG_REALTIME) flags |= FS_XFLAG_REALTIME; if (ip->i_diflags & XFS_DIFLAG_PREALLOC) flags |= FS_XFLAG_PREALLOC; if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) flags |= FS_XFLAG_IMMUTABLE; if (ip->i_diflags & XFS_DIFLAG_APPEND) flags |= FS_XFLAG_APPEND; if (ip->i_diflags & XFS_DIFLAG_SYNC) flags |= FS_XFLAG_SYNC; if (ip->i_diflags & XFS_DIFLAG_NOATIME) flags |= FS_XFLAG_NOATIME; if (ip->i_diflags & XFS_DIFLAG_NODUMP) flags |= FS_XFLAG_NODUMP; if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) flags |= FS_XFLAG_RTINHERIT; if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) flags |= FS_XFLAG_PROJINHERIT; if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) flags |= FS_XFLAG_NOSYMLINKS; if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) flags |= FS_XFLAG_EXTSIZE; if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) flags |= FS_XFLAG_EXTSZINHERIT; if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) flags |= FS_XFLAG_NODEFRAG; if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) flags |= FS_XFLAG_FILESTREAM; } if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { if (ip->i_diflags2 & XFS_DIFLAG2_DAX) flags |= FS_XFLAG_DAX; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) flags |= FS_XFLAG_COWEXTSIZE; } if (xfs_inode_has_attr_fork(ip)) flags |= FS_XFLAG_HASATTR; return flags; } prid_t xfs_get_initial_prid(struct xfs_inode *dp) { if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT) return dp->i_projid; /* Assign to the root project by default. */ return 0; } /* Propagate di_flags from a parent inode to a child inode. */ static inline void xfs_inode_inherit_flags( struct xfs_inode *ip, const struct xfs_inode *pip) { unsigned int di_flags = 0; xfs_failaddr_t failaddr; umode_t mode = VFS_I(ip)->i_mode; if (S_ISDIR(mode)) { if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { di_flags |= XFS_DIFLAG_EXTSZINHERIT; ip->i_extsize = pip->i_extsize; } if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(mode)) { if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && xfs_has_realtime(ip->i_mount)) di_flags |= XFS_DIFLAG_REALTIME; if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { di_flags |= XFS_DIFLAG_EXTSIZE; ip->i_extsize = pip->i_extsize; } } if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && xfs_inherit_noatime) di_flags |= XFS_DIFLAG_NOATIME; if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && xfs_inherit_nodump) di_flags |= XFS_DIFLAG_NODUMP; if ((pip->i_diflags & XFS_DIFLAG_SYNC) && xfs_inherit_sync) di_flags |= XFS_DIFLAG_SYNC; if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && xfs_inherit_nosymlinks) di_flags |= XFS_DIFLAG_NOSYMLINKS; if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && xfs_inherit_nodefrag) di_flags |= XFS_DIFLAG_NODEFRAG; if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; ip->i_diflags |= di_flags; /* * Inode verifiers on older kernels only check that the extent size * hint is an integer multiple of the rt extent size on realtime files. * They did not check the hint alignment on a directory with both * rtinherit and extszinherit flags set. If the misaligned hint is * propagated from a directory into a new realtime file, new file * allocations will fail due to math errors in the rt allocator and/or * trip the verifiers. Validate the hint settings in the new file so * that we don't let broken hints propagate. */ failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, VFS_I(ip)->i_mode, ip->i_diflags); if (failaddr) { ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT); ip->i_extsize = 0; } } /* Propagate di_flags2 from a parent inode to a child inode. */ static inline void xfs_inode_inherit_flags2( struct xfs_inode *ip, const struct xfs_inode *pip) { xfs_failaddr_t failaddr; if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_cowextsize = pip->i_cowextsize; } if (pip->i_diflags2 & XFS_DIFLAG2_DAX) ip->i_diflags2 |= XFS_DIFLAG2_DAX; if (xfs_is_metadir_inode(pip)) ip->i_diflags2 |= XFS_DIFLAG2_METADATA; /* Don't let invalid cowextsize hints propagate. */ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); if (failaddr) { ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; ip->i_cowextsize = 0; } } /* * If we need to create attributes immediately after allocating the inode, * initialise an empty attribute fork right now. We use the default fork offset * for attributes here as we don't know exactly what size or how many * attributes we might be adding. We can do this safely here because we know * the data fork is completely empty and this saves us from needing to run a * separate transaction to set the fork offset in the immediate future. * * If we have parent pointers and the caller hasn't told us that the file will * never be linked into a directory tree, we /must/ create the attr fork. */ static inline bool xfs_icreate_want_attrfork( struct xfs_mount *mp, const struct xfs_icreate_args *args) { if (args->flags & XFS_ICREATE_INIT_XATTRS) return true; if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp)) return true; return false; } /* Initialise an inode's attributes. */ void xfs_inode_init( struct xfs_trans *tp, const struct xfs_icreate_args *args, struct xfs_inode *ip) { struct xfs_inode *pip = args->pip; struct inode *dir = pip ? VFS_I(pip) : NULL; struct xfs_mount *mp = tp->t_mountp; struct inode *inode = VFS_I(ip); unsigned int flags; int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG | XFS_ICHGTIME_ACCESS; if (args->flags & XFS_ICREATE_TMPFILE) set_nlink(inode, 0); else if (S_ISDIR(args->mode)) set_nlink(inode, 2); else set_nlink(inode, 1); inode->i_rdev = args->rdev; if (!args->idmap || pip == NULL) { /* creating a tree root, sb rooted, or detached file */ inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; ip->i_projid = 0; inode->i_mode = args->mode; } else { /* creating a child in the directory tree */ if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { inode_fsuid_set(inode, args->idmap); inode->i_gid = dir->i_gid; inode->i_mode = args->mode; } else { inode_init_owner(args->idmap, inode, dir, args->mode); } ip->i_projid = xfs_get_initial_prid(pip); } ip->i_disk_size = 0; ip->i_df.if_nextents = 0; ASSERT(ip->i_nblocks == 0); ip->i_extsize = 0; ip->i_diflags = 0; if (xfs_has_v3inodes(mp)) { inode_set_iversion(inode, 1); /* also covers the di_used_blocks union arm: */ ip->i_cowextsize = 0; times |= XFS_ICHGTIME_CREATE; } xfs_trans_ichgtime(tp, ip, times); flags = XFS_ILOG_CORE; switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: ip->i_df.if_format = XFS_DINODE_FMT_DEV; flags |= XFS_ILOG_DEV; break; case S_IFREG: case S_IFDIR: if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) xfs_inode_inherit_flags(ip, pip); if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) xfs_inode_inherit_flags2(ip, pip); fallthrough; case S_IFLNK: ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; ip->i_df.if_bytes = 0; ip->i_df.if_data = NULL; break; default: ASSERT(0); } if (xfs_icreate_want_attrfork(mp, args)) { ip->i_forkoff = xfs_default_attroffset(ip) >> 3; xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); if (!xfs_has_attr(mp)) { spin_lock(&mp->m_sb_lock); xfs_add_attr(mp); spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); } } xfs_trans_log_inode(tp, ip, flags); } /* * In-Core Unlinked List Lookups * ============================= * * Every inode is supposed to be reachable from some other piece of metadata * with the exception of the root directory. Inodes with a connection to a * file descriptor but not linked from anywhere in the on-disk directory tree * are collectively known as unlinked inodes, though the filesystem itself * maintains links to these inodes so that on-disk metadata are consistent. * * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI * header contains a number of buckets that point to an inode, and each inode * record has a pointer to the next inode in the hash chain. This * singly-linked list causes scaling problems in the iunlink remove function * because we must walk that list to find the inode that points to the inode * being removed from the unlinked hash bucket list. * * Hence we keep an in-memory double linked list to link each inode on an * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer * based lists would require having 64 list heads in the perag, one for each * list. This is expensive in terms of memory (think millions of AGs) and cache * misses on lookups. Instead, use the fact that inodes on the unlinked list * must be referenced at the VFS level to keep them on the list and hence we * have an existence guarantee for inodes on the unlinked list. * * Given we have an existence guarantee, we can use lockless inode cache lookups * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode * for the double linked unlinked list, and we don't need any extra locking to * keep the list safe as all manipulations are done under the AGI buffer lock. * Keeping the list up to date does not require memory allocation, just finding * the XFS inode and updating the next/prev unlinked list aginos. */ /* * Update the prev pointer of the next agino. Returns -ENOLINK if the inode * is not in cache. */ static int xfs_iunlink_update_backref( struct xfs_perag *pag, xfs_agino_t prev_agino, xfs_agino_t next_agino) { struct xfs_inode *ip; /* No update necessary if we are at the end of the list. */ if (next_agino == NULLAGINO) return 0; ip = xfs_iunlink_lookup(pag, next_agino); if (!ip) return -ENOLINK; ip->i_prev_unlinked = prev_agino; return 0; } /* * Point the AGI unlinked bucket at an inode and log the results. The caller * is responsible for validating the old value. */ STATIC int xfs_iunlink_update_bucket( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_buf *agibp, unsigned int bucket_index, xfs_agino_t new_agino) { struct xfs_agi *agi = agibp->b_addr; xfs_agino_t old_value; int offset; ASSERT(xfs_verify_agino_or_null(pag, new_agino)); old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value, new_agino); /* * We should never find the head of the list already set to the value * passed in because either we're adding or removing ourselves from the * head of the list. */ if (old_value == new_agino) { xfs_buf_mark_corrupt(agibp); xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); return -EFSCORRUPTED; } agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); offset = offsetof(struct xfs_agi, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); return 0; } static int xfs_iunlink_insert_inode( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_buf *agibp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agibp->b_addr; xfs_agino_t next_agino; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; /* * Get the index into the agi hash table for the list this inode will * go on. Make sure the pointer isn't garbage and that this inode * isn't already on the list. */ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); if (next_agino == agino || !xfs_verify_agino_or_null(pag, next_agino)) { xfs_buf_mark_corrupt(agibp); xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); return -EFSCORRUPTED; } /* * Update the prev pointer in the next inode to point back to this * inode. */ error = xfs_iunlink_update_backref(pag, agino, next_agino); if (error == -ENOLINK) error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); if (error) return error; if (next_agino != NULLAGINO) { /* * There is already another inode in the bucket, so point this * inode to the current head of the list. */ error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); if (error) return error; ip->i_next_unlinked = next_agino; } /* Point the head of the list to point to this inode. */ ip->i_prev_unlinked = NULLAGINO; return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); } /* * This is called when the inode's link count has gone to 0 or we are creating * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. * * We place the on-disk inode on a list in the AGI. It will be pulled from this * list when the inode is freed. */ int xfs_iunlink( struct xfs_trans *tp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; struct xfs_perag *pag; struct xfs_buf *agibp; int error; ASSERT(VFS_I(ip)->i_nlink == 0); ASSERT(VFS_I(ip)->i_mode != 0); trace_xfs_iunlink(ip); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); /* Get the agi buffer first. It ensures lock ordering on the list. */ error = xfs_read_agi(pag, tp, 0, &agibp); if (error) goto out; error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); out: xfs_perag_put(pag); return error; } static int xfs_iunlink_remove_inode( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_buf *agibp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agibp->b_addr; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t head_agino; short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; trace_xfs_iunlink_remove(ip); /* * Get the index into the agi hash table for the list this inode will * go on. Make sure the head pointer isn't garbage. */ head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); if (!xfs_verify_agino(pag, head_agino)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi, sizeof(*agi)); xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); return -EFSCORRUPTED; } /* * Set our inode's next_unlinked pointer to NULL and then return * the old pointer value so that we can update whatever was previous * to us in the list to point to whatever was next in the list. */ error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); if (error) return error; /* * Update the prev pointer in the next inode to point back to previous * inode in the chain. */ error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, ip->i_next_unlinked); if (error == -ENOLINK) error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, ip->i_next_unlinked); if (error) return error; if (head_agino != agino) { struct xfs_inode *prev_ip; prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); if (!prev_ip) { xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } error = xfs_iunlink_log_inode(tp, prev_ip, pag, ip->i_next_unlinked); prev_ip->i_next_unlinked = ip->i_next_unlinked; } else { /* Point the head of the list to the next unlinked inode. */ error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, ip->i_next_unlinked); } ip->i_next_unlinked = NULLAGINO; ip->i_prev_unlinked = 0; return error; } /* * Pull the on-disk inode from the AGI unlinked list. */ int xfs_iunlink_remove( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_inode *ip) { struct xfs_buf *agibp; int error; trace_xfs_iunlink_remove(ip); /* Get the agi buffer first. It ensures lock ordering on the list. */ error = xfs_read_agi(pag, tp, 0, &agibp); if (error) return error; return xfs_iunlink_remove_inode(tp, pag, agibp, ip); } /* * Decrement the link count on an inode & log the change. If this causes the * link count to go to zero, move the inode to AGI unlinked list so that it can * be freed when the last active reference goes away via xfs_inactive(). */ int xfs_droplink( struct xfs_trans *tp, struct xfs_inode *ip) { struct inode *inode = VFS_I(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); if (inode->i_nlink == 0) { xfs_info_ratelimited(tp->t_mountp, "Inode 0x%llx link count dropped below zero. Pinning link count.", ip->i_ino); set_nlink(inode, XFS_NLINK_PINNED); } if (inode->i_nlink != XFS_NLINK_PINNED) drop_nlink(inode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (inode->i_nlink) return 0; return xfs_iunlink(tp, ip); } /* * Increment the link count on an inode & log the change. */ void xfs_bumplink( struct xfs_trans *tp, struct xfs_inode *ip) { struct inode *inode = VFS_I(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); if (inode->i_nlink == XFS_NLINK_PINNED - 1) xfs_info_ratelimited(tp->t_mountp, "Inode 0x%llx link count exceeded maximum. Pinning link count.", ip->i_ino); if (inode->i_nlink != XFS_NLINK_PINNED) inc_nlink(inode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } /* Free an inode in the ondisk index and zero it out. */ int xfs_inode_uninit( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_inode *ip, struct xfs_icluster *xic) { struct xfs_mount *mp = ip->i_mount; int error; /* * Free the inode first so that we guarantee that the AGI lock is going * to be taken before we remove the inode from the unlinked list. This * makes the AGI lock -> unlinked list modification order the same as * used in O_TMPFILE creation. */ error = xfs_difree(tp, pag, ip->i_ino, xic); if (error) return error; error = xfs_iunlink_remove(tp, pag, ip); if (error) return error; /* * Free any local-format data sitting around before we reset the * data fork to extents format. Note that the attr fork data has * already been freed by xfs_attr_inactive. */ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { kfree(ip->i_df.if_data); ip->i_df.if_data = NULL; ip->i_df.if_bytes = 0; } VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_diflags = 0; ip->i_diflags2 = mp->m_ino_geo.new_diflags2; ip->i_forkoff = 0; /* mark the attr fork not in use */ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; /* * Bump the generation count so no one will be confused * by reincarnations of this inode. */ VFS_I(ip)->i_generation++; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return 0; } |
| 7416 7411 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen <martin.petersen@oracle.com> * * Automatically generate and verify integrity data on PI capable devices if the * bio submitter didn't provide PI itself. This ensures that kernel verifies * data integrity even if the file system (or other user of the block device) is * not aware of PI. */ #include <linux/blk-integrity.h> #include <linux/t10-pi.h> #include <linux/workqueue.h> #include "blk.h" struct bio_integrity_data { struct bio *bio; struct bvec_iter saved_bio_iter; struct work_struct work; struct bio_integrity_payload bip; struct bio_vec bvec; }; static struct kmem_cache *bid_slab; static mempool_t bid_pool; static struct workqueue_struct *kintegrityd_wq; static void bio_integrity_finish(struct bio_integrity_data *bid) { bid->bio->bi_integrity = NULL; bid->bio->bi_opf &= ~REQ_INTEGRITY; kfree(bvec_virt(bid->bip.bip_vec)); mempool_free(bid, &bid_pool); } static void bio_integrity_verify_fn(struct work_struct *work) { struct bio_integrity_data *bid = container_of(work, struct bio_integrity_data, work); struct bio *bio = bid->bio; blk_integrity_verify_iter(bio, &bid->saved_bio_iter); bio_integrity_finish(bid); bio_endio(bio); } #define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) static bool bip_should_check(struct bio_integrity_payload *bip) { return bip->bip_flags & BIP_CHECK_FLAGS; } static bool bi_offload_capable(struct blk_integrity *bi) { switch (bi->csum_type) { case BLK_INTEGRITY_CSUM_CRC64: return bi->metadata_size == sizeof(struct crc64_pi_tuple); case BLK_INTEGRITY_CSUM_CRC: case BLK_INTEGRITY_CSUM_IP: return bi->metadata_size == sizeof(struct t10_pi_tuple); default: pr_warn_once("%s: unknown integrity checksum type:%d\n", __func__, bi->csum_type); fallthrough; case BLK_INTEGRITY_CSUM_NONE: return false; } } /** * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio * * Normally I/O completion is done in interrupt context. However, verifying I/O * integrity is a time-consuming task which must be run in process context. * * This function postpones completion accordingly. */ bool __bio_integrity_endio(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_data *bid = container_of(bip, struct bio_integrity_data, bip); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bip_should_check(bip)) { INIT_WORK(&bid->work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bid->work); return false; } bio_integrity_finish(bid); return true; } /** * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare * * Checks if the bio already has an integrity payload attached. If it does, the * payload has been generated by another kernel subsystem, and we just pass it * through. * Otherwise allocates integrity payload and for writes the integrity metadata * will be generated. For reads, the completion handler will verify the * metadata. */ bool bio_integrity_prep(struct bio *bio) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_data *bid; bool set_flags = true; gfp_t gfp = GFP_NOIO; unsigned int len; void *buf; if (!bi) return true; if (!bio_sectors(bio)) return true; /* Already protected? */ if (bio_integrity(bio)) return true; switch (bio_op(bio)) { case REQ_OP_READ: if (bi->flags & BLK_INTEGRITY_NOVERIFY) { if (bi_offload_capable(bi)) return true; set_flags = false; } break; case REQ_OP_WRITE: /* * Zero the memory allocated to not leak uninitialized kernel * memory to disk for non-integrity metadata where nothing else * initializes the memory. */ if (bi->flags & BLK_INTEGRITY_NOGENERATE) { if (bi_offload_capable(bi)) return true; set_flags = false; gfp |= __GFP_ZERO; } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) gfp |= __GFP_ZERO; break; default: return true; } if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) return true; /* Allocate kernel buffer for protection data */ len = bio_integrity_bytes(bi, bio_sectors(bio)); buf = kmalloc(len, gfp); if (!buf) goto err_end_io; bid = mempool_alloc(&bid_pool, GFP_NOIO); if (!bid) goto err_free_buf; bio_integrity_init(bio, &bid->bip, &bid->bvec, 1); bid->bio = bio; bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); if (set_flags) { if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) bid->bip.bip_flags |= BIP_IP_CHECKSUM; if (bi->csum_type) bid->bip.bip_flags |= BIP_CHECK_GUARD; if (bi->flags & BLK_INTEGRITY_REF_TAG) bid->bip.bip_flags |= BIP_CHECK_REFTAG; } if (bio_integrity_add_page(bio, virt_to_page(buf), len, offset_in_page(buf)) < len) goto err_end_io; /* Auto-generate integrity metadata if this is a write */ if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) blk_integrity_generate(bio); else bid->saved_bio_iter = bio->bi_iter; return true; err_free_buf: kfree(buf); err_end_io: bio->bi_status = BLK_STS_RESOURCE; bio_endio(bio); return false; } EXPORT_SYMBOL(bio_integrity_prep); void blk_flush_integrity(void) { flush_workqueue(kintegrityd_wq); } static int __init blk_integrity_auto_init(void) { bid_slab = kmem_cache_create("bio_integrity_data", sizeof(struct bio_integrity_data), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); if (mempool_init_slab_pool(&bid_pool, BIO_POOL_SIZE, bid_slab)) panic("bio: can't create integrity pool\n"); /* * kintegrityd won't block much but may burn a lot of CPU cycles. * Make it highpri CPU intensive wq with max concurrency of 1. */ kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1); if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); return 0; } subsys_initcall(blk_integrity_auto_init); |
| 231 59 252 197 4 15 36 18 4 189 131 130 99 166 9 49 173 173 48 19 13 99 101 5 1 2 2 2 131 131 130 8 129 131 141 20 124 4 18 43 106 18 92 184 182 36 153 184 184 184 140 53 113 113 112 113 24 89 113 113 113 113 113 2 110 2 7 5 5 5 4 5 7 1 2 2 3 2 3 3 10 3 7 2 1 1 10 2 1 268 244 24 15 254 254 253 261 3 4 124 33 33 313 176 4 158 40 38 2 40 151 296 1 331 4 2 13 16 15 1 1 52 280 43 44 20 12 136 121 20 4 121 119 106 2 285 92 288 8 165 22 139 384 1 1 370 1 11 331 374 1 1 1 377 99 285 369 90 293 238 8 36 151 68 167 390 1 386 4 182 233 9 1 10 172 157 235 389 2 2 2 2 116 1 4 111 116 119 119 2 117 2 3 1 2 2 4 3 2 1 1 1 6 3 3 1 1 1 7 1 7 5 7 2 5 24 17 7 1 10 8 7 3 7 5 12 2 136 2 109 362 231 136 136 393 394 1 7 362 18 7 2 2 1 16 1 1 5 2 7 5 2 5 2 5 2 7 5 4 4 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_api.c Packet action API. * * Author: Jamal Hadi Salim */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/err.h> #include <linux/module.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_pedit.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/flow_offload.h> #include <net/tc_wrapper.h> #ifdef CONFIG_INET DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); EXPORT_SYMBOL_GPL(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) { #ifdef CONFIG_INET if (static_branch_unlikely(&tcf_frag_xmit_count)) return sch_frag_xmit_hook(skb, xmit); #endif return xmit(skb); } EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit); static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } static void tcf_free_cookie_rcu(struct rcu_head *p) { struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu); kfree(cookie->data); kfree(cookie); } static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, struct tc_cookie *new_cookie) { struct tc_cookie *old; old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie))); if (old) call_rcu(&old->rcu, tcf_free_cookie_rcu); } int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **newchain, struct netlink_ext_ack *extack) { int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; u32 chain_index; if (!opcode) ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) ret = 0; if (ret) { NL_SET_ERR_MSG(extack, "invalid control action"); goto end; } if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { chain_index = action & TC_ACT_EXT_VAL_MASK; if (!tp || !newchain) { ret = -EINVAL; NL_SET_ERR_MSG(extack, "can't goto NULL proto/chain"); goto end; } *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); if (!*newchain) { ret = -ENOMEM; NL_SET_ERR_MSG(extack, "can't allocate goto_chain"); } } end: return ret; } EXPORT_SYMBOL(tcf_action_check_ctrlact); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, struct tcf_chain *goto_chain) { a->tcfa_action = action; goto_chain = rcu_replace_pointer(a->goto_chain, goto_chain, 1); return goto_chain; } EXPORT_SYMBOL(tcf_action_set_ctrlact); /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected * from filters. Readers later can not find us. */ static void free_tcf(struct tc_action *p) { struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->user_cookie, NULL); if (chain) tcf_chain_put_by_act(chain); kfree(p); } static void offload_action_hw_count_set(struct tc_action *act, u32 hw_count) { act->in_hw_count = hw_count; } static void offload_action_hw_count_inc(struct tc_action *act, u32 hw_count) { act->in_hw_count += hw_count; } static void offload_action_hw_count_dec(struct tc_action *act, u32 hw_count) { act->in_hw_count = act->in_hw_count > hw_count ? act->in_hw_count - hw_count : 0; } static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) { if (is_tcf_pedit(act)) return tcf_pedit_nkeys(act); else return 1; } static bool tc_act_skip_hw(u32 flags) { return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false; } static bool tc_act_skip_sw(u32 flags) { return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; } /* SKIP_HW and SKIP_SW are mutually exclusive flags. */ static bool tc_act_flags_valid(u32 flags) { flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW; return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW); } static int offload_action_init(struct flow_offload_action *fl_action, struct tc_action *act, enum offload_act_command cmd, struct netlink_ext_ack *extack) { int err; fl_action->extack = extack; fl_action->command = cmd; fl_action->index = act->tcfa_index; fl_action->cookie = (unsigned long)act; if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); err = act->ops->offload_act_setup(act, fl_action, NULL, false, extack); spin_unlock_bh(&act->tcfa_lock); return err; } return -EOPNOTSUPP; } static int tcf_action_offload_cmd_ex(struct flow_offload_action *fl_act, u32 *hw_count) { int err; err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, fl_act, NULL, NULL); if (err < 0) return err; if (hw_count) *hw_count = err; return 0; } static int tcf_action_offload_cmd_cb_ex(struct flow_offload_action *fl_act, u32 *hw_count, flow_indr_block_bind_cb_t *cb, void *cb_priv) { int err; err = cb(NULL, NULL, cb_priv, TC_SETUP_ACT, NULL, fl_act, NULL); if (err < 0) return err; if (hw_count) *hw_count = 1; return 0; } static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, u32 *hw_count, flow_indr_block_bind_cb_t *cb, void *cb_priv) { return cb ? tcf_action_offload_cmd_cb_ex(fl_act, hw_count, cb, cb_priv) : tcf_action_offload_cmd_ex(fl_act, hw_count); } static int tcf_action_offload_add_ex(struct tc_action *action, struct netlink_ext_ack *extack, flow_indr_block_bind_cb_t *cb, void *cb_priv) { bool skip_sw = tc_act_skip_sw(action->tcfa_flags); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct flow_offload_action *fl_action; u32 in_hw_count = 0; int num, err = 0; if (tc_act_skip_hw(action->tcfa_flags)) return 0; num = tcf_offload_act_num_actions_single(action); fl_action = offload_action_alloc(num); if (!fl_action) return -ENOMEM; err = offload_action_init(fl_action, action, FLOW_ACT_REPLACE, extack); if (err) goto fl_err; err = tc_setup_action(&fl_action->action, actions, 0, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup tc actions for offload"); goto fl_err; } err = tcf_action_offload_cmd(fl_action, &in_hw_count, cb, cb_priv); if (!err) cb ? offload_action_hw_count_inc(action, in_hw_count) : offload_action_hw_count_set(action, in_hw_count); if (skip_sw && !tc_act_in_hw(action)) err = -EINVAL; tc_cleanup_offload_action(&fl_action->action); fl_err: kfree(fl_action); return err; } /* offload the tc action after it is inserted */ static int tcf_action_offload_add(struct tc_action *action, struct netlink_ext_ack *extack) { return tcf_action_offload_add_ex(action, extack, NULL, NULL); } int tcf_action_update_hw_stats(struct tc_action *action) { struct flow_offload_action fl_act = {}; int err; err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL); if (err) return err; err = tcf_action_offload_cmd(&fl_act, NULL, NULL, NULL); if (!err) { preempt_disable(); tcf_action_stats_update(action, fl_act.stats.bytes, fl_act.stats.pkts, fl_act.stats.drops, fl_act.stats.lastused, true); preempt_enable(); action->used_hw_stats = fl_act.stats.used_hw_stats; action->used_hw_stats_valid = true; } else { return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL(tcf_action_update_hw_stats); static int tcf_action_offload_del_ex(struct tc_action *action, flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_offload_action fl_act = {}; u32 in_hw_count = 0; int err = 0; if (!tc_act_in_hw(action)) return 0; err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL); if (err) return err; err = tcf_action_offload_cmd(&fl_act, &in_hw_count, cb, cb_priv); if (err < 0) return err; if (!cb && action->in_hw_count != in_hw_count) return -EINVAL; /* do not need to update hw state when deleting action */ if (cb && in_hw_count) offload_action_hw_count_dec(action, in_hw_count); return 0; } static int tcf_action_offload_del(struct tc_action *action) { return tcf_action_offload_del_ex(action, NULL, NULL); } static void tcf_action_cleanup(struct tc_action *p) { tcf_action_offload_del(p); if (p->ops->cleanup) p->ops->cleanup(p); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } static int __tcf_action_put(struct tc_action *p, bool bind) { struct tcf_idrinfo *idrinfo = p->idrinfo; if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) { if (bind) atomic_dec(&p->tcfa_bindcnt); idr_remove(&idrinfo->action_idr, p->tcfa_index); mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); return 1; } if (bind) atomic_dec(&p->tcfa_bindcnt); return 0; } static int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; /* Release with strict==1 and bind==0 is only called through act API * interface (classifiers always bind). Only case when action with * positive reference count and zero bind count can exist is when it was * also created with act API (unbinding last classifier will destroy the * action if it was created by classifier). So only case when bind count * can be changed after initial check is when unbound action is * destroyed by act API while classifier binds to action with same id * concurrently. This result either creation of new action(same behavior * as before), or reusing existing action if concurrent process * increments reference count before action is deleted. Both scenarios * are acceptable. */ if (p) { if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; if (__tcf_action_put(p, bind)) ret = ACT_P_DELETED; } return ret; } int tcf_idr_release(struct tc_action *a, bool bind) { const struct tc_action_ops *ops = a->ops; int ret; ret = __tcf_idr_release(a, bind, false); if (ret == ACT_P_DELETED) module_put(ops->owner); return ret; } EXPORT_SYMBOL(tcf_idr_release); static size_t tcf_action_shared_attrs_size(const struct tc_action *act) { struct tc_cookie *user_cookie; u32 cookie_len = 0; rcu_read_lock(); user_cookie = rcu_dereference(act->user_cookie); if (user_cookie) cookie_len = nla_total_size(user_cookie->len); rcu_read_unlock(); return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) /* TCA_STATS_PKT64 */ + nla_total_size_64bit(sizeof(u64)) /* TCA_STATS_QUEUE */ + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) + nla_total_size(0) /* TCA_ACT_OPTIONS nested */ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ } static size_t tcf_action_full_attrs_size(size_t sz) { return NLMSG_HDRLEN /* struct nlmsghdr */ + sizeof(struct tcamsg) + nla_total_size(0) /* TCA_ACT_TAB nested */ + sz; } static size_t tcf_action_fill_size(const struct tc_action *act) { size_t sz = tcf_action_shared_attrs_size(act); if (act->ops->get_fill_size) return act->ops->get_fill_size(act) + sz; return sz; } static int tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) { unsigned char *b = skb_tail_pointer(skb); struct tc_cookie *cookie; if (nla_put_string(skb, TCA_ACT_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index)) goto nla_put_failure; rcu_read_lock(); cookie = rcu_dereference(a->user_cookie); if (cookie) { if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) { rcu_read_unlock(); goto nla_put_failure; } } rcu_read_unlock(); return 0; nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; int err = -EINVAL; u32 flags; if (tcf_action_dump_terse(skb, a, false)) goto nla_put_failure; if (a->hw_stats != TCA_ACT_HW_STATS_ANY && nla_put_bitfield32(skb, TCA_ACT_HW_STATS, a->hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; if (a->used_hw_stats_valid && nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; if (flags && nla_put_bitfield32(skb, TCA_ACT_FLAGS, flags, flags)) goto nla_put_failure; if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); if (err > 0) { nla_nest_end(skb, nest); return err; } nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { int err = 0, index = -1, s_i = 0, n_i = 0; u32 act_flags = cb->args[2]; unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; struct idr *idr = &idrinfo->action_idr; struct tc_action *p; unsigned long id = 1; unsigned long tmp; mutex_lock(&idrinfo->lock); s_i = cb->args[0]; idr_for_each_entry_ul(idr, p, tmp, id) { index++; if (index < s_i) continue; if (IS_ERR(p)) continue; if (jiffy_since && time_after(jiffy_since, (unsigned long)p->tcfa_tm.lastuse)) continue; tcf_action_update_hw_stats(p); nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; goto nla_put_failure; } err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ? tcf_action_dump_terse(skb, p, true) : tcf_action_dump_1(skb, p, 0, 0); if (err < 0) { index--; nlmsg_trim(skb, nest); goto done; } nla_nest_end(skb, nest); n_i++; if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) && n_i >= TCA_ACT_MAX_PRIO) goto done; } done: if (index >= 0) cb->args[0] = index + 1; mutex_unlock(&idrinfo->lock); if (n_i) { if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; } return n_i; nla_put_failure: nla_nest_cancel(skb, nest); goto done; } static int tcf_idr_release_unsafe(struct tc_action *p) { if (atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; if (refcount_dec_and_test(&p->tcfa_refcnt)) { idr_remove(&p->idrinfo->action_idr, p->tcfa_index); tcf_action_cleanup(p); return ACT_P_DELETED; } return 0; } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; int ret = -EINVAL; struct idr *idr = &idrinfo->action_idr; struct tc_action *p; unsigned long id = 1; unsigned long tmp; nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_ACT_KIND, ops->kind)) goto nla_put_failure; ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) break; n_i++; } mutex_unlock(&idrinfo->lock); if (ret < 0) { if (n_i) NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); else goto nla_put_failure; } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) goto nla_put_failure; nla_nest_end(skb, nest); return n_i; nla_put_failure: nla_nest_cancel(skb, nest); return ret; } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown command %d\n", type); NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command"); return -EINVAL; } } EXPORT_SYMBOL(tcf_generic_walker); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (IS_ERR(p)) p = NULL; else if (p) refcount_inc(&p->tcfa_refcnt); mutex_unlock(&idrinfo->lock); if (p) { *a = p; return true; } return false; } EXPORT_SYMBOL(tcf_idr_search); static int __tcf_generic_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ops->net_id); if (unlikely(ops->walk)) return ops->walk(net, skb, cb, type, ops, extack); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int __tcf_idr_search(struct net *net, const struct tc_action_ops *ops, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ops->net_id); if (unlikely(ops->lookup)) return ops->lookup(net, a, index); return tcf_idr_search(tn, a, index); } static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; int ret = 0; mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (!p) { mutex_unlock(&idrinfo->lock); return -ENOENT; } if (!atomic_read(&p->tcfa_bindcnt)) { if (refcount_dec_and_test(&p->tcfa_refcnt)) { struct module *owner = p->ops->owner; WARN_ON(p != idr_remove(&idrinfo->action_idr, p->tcfa_index)); mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); module_put(owner); return 0; } ret = 0; } else { ret = -EPERM; } mutex_unlock(&idrinfo->lock); return ret; } int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats, u32 flags) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); struct tcf_idrinfo *idrinfo = tn->idrinfo; int err = -ENOMEM; if (unlikely(!p)) return -ENOMEM; refcount_set(&p->tcfa_refcnt, 1); if (bind) atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats) goto err1; p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats_hw) goto err2; p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); if (!p->cpu_qstats) goto err3; } gnet_stats_basic_sync_init(&p->tcfa_bstats); gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; p->tcfa_flags = flags; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, &p->tcfa_lock, false, est); if (err) goto err4; } p->idrinfo = idrinfo; __module_get(ops->owner); p->ops = ops; *a = p; return 0; err4: free_percpu(p->cpu_qstats); err3: free_percpu(p->cpu_bstats_hw); err2: free_percpu(p->cpu_bstats); err1: kfree(p); return err; } EXPORT_SYMBOL(tcf_idr_create); int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags) { /* Set cpustats according to actions flags. */ return tcf_idr_create(tn, index, est, a, ops, bind, !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags); } EXPORT_SYMBOL(tcf_idr_create_from_flags); /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; mutex_lock(&idrinfo->lock); /* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index))); mutex_unlock(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_cleanup); /* Check if action with specified index exists. If actions is found, increments * its reference and bind counters, and return 1. Otherwise insert temporary * error pointer (to prevent concurrent users from inserting actions with same * index) and return 0. * * May return -EAGAIN for binding actions in case of a parallel add/delete on * the requested index. */ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret; u32 max; if (*index) { rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); if (IS_ERR(p)) { /* This means that another process allocated * index but did not assign the pointer yet. */ rcu_read_unlock(); return -EAGAIN; } if (!p) { /* Empty slot, try to allocate it */ max = *index; rcu_read_unlock(); goto new; } if (!refcount_inc_not_zero(&p->tcfa_refcnt)) { /* Action was deleted in parallel */ rcu_read_unlock(); return -EAGAIN; } if (bind) atomic_inc(&p->tcfa_bindcnt); *a = p; rcu_read_unlock(); return 1; } else { /* Find a slot */ *index = 1; max = UINT_MAX; } new: *a = NULL; mutex_lock(&idrinfo->lock); ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max, GFP_KERNEL); mutex_unlock(&idrinfo->lock); /* N binds raced for action allocation, * retry for all the ones that failed. */ if (ret == -ENOSPC && *index == max) ret = -EAGAIN; return ret; } EXPORT_SYMBOL(tcf_idr_check_alloc); void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo) { struct idr *idr = &idrinfo->action_idr; bool mutex_taken = false; struct tc_action *p; unsigned long id = 1; unsigned long tmp; int ret; idr_for_each_entry_ul(idr, p, tmp, id) { if (tc_act_in_hw(p) && !mutex_taken) { rtnl_lock(); mutex_taken = true; } ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return; } if (mutex_taken) rtnl_unlock(); idr_destroy(&idrinfo->action_idr); } EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); /* since act ops id is stored in pernet subsystem list, * then there is no way to walk through only all the action * subsystem, so we keep tc action pernet ops id for * reoffload to walk through. */ static LIST_HEAD(act_pernet_id_list); static DEFINE_MUTEX(act_id_mutex); struct tc_act_pernet_id { struct list_head list; unsigned int id; }; static int tcf_pernet_add_id_list(unsigned int id) { struct tc_act_pernet_id *id_ptr; int ret = 0; mutex_lock(&act_id_mutex); list_for_each_entry(id_ptr, &act_pernet_id_list, list) { if (id_ptr->id == id) { ret = -EEXIST; goto err_out; } } id_ptr = kzalloc(sizeof(*id_ptr), GFP_KERNEL); if (!id_ptr) { ret = -ENOMEM; goto err_out; } id_ptr->id = id; list_add_tail(&id_ptr->list, &act_pernet_id_list); err_out: mutex_unlock(&act_id_mutex); return ret; } static void tcf_pernet_del_id_list(unsigned int id) { struct tc_act_pernet_id *id_ptr; mutex_lock(&act_id_mutex); list_for_each_entry(id_ptr, &act_pernet_id_list, list) { if (id_ptr->id == id) { list_del(&id_ptr->list); kfree(id_ptr); break; } } mutex_unlock(&act_id_mutex); } int tcf_register_action(struct tc_action_ops *act, struct pernet_operations *ops) { struct tc_action_ops *a; int ret; if (!act->act || !act->dump || !act->init) return -EINVAL; /* We have to register pernet ops before making the action ops visible, * otherwise tcf_action_init_1() could get a partially initialized * netns. */ ret = register_pernet_subsys(ops); if (ret) return ret; if (ops->id) { ret = tcf_pernet_add_id_list(*ops->id); if (ret) goto err_id; } write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { ret = -EEXIST; goto err_out; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; err_out: write_unlock(&act_mod_lock); if (ops->id) tcf_pernet_del_id_list(*ops->id); err_id: unregister_pernet_subsys(ops); return ret; } EXPORT_SYMBOL(tcf_register_action); int tcf_unregister_action(struct tc_action_ops *act, struct pernet_operations *ops) { struct tc_action_ops *a; int err = -ENOENT; write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (a == act) { list_del(&act->head); err = 0; break; } } write_unlock(&act_mod_lock); if (!err) { unregister_pernet_subsys(ops); if (ops->id) tcf_pernet_del_id_list(*ops->id); } return err; } EXPORT_SYMBOL(tcf_unregister_action); /* lookup by name */ static struct tc_action_ops *tc_lookup_action_n(char *kind) { struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (strcmp(kind, a->kind) == 0) { if (try_module_get(a->owner)) res = a; break; } } read_unlock(&act_mod_lock); } return res; } /* lookup by nlattr */ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) { struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (nla_strcmp(kind, a->kind) == 0) { if (try_module_get(a->owner)) res = a; break; } } read_unlock(&act_mod_lock); } return res; } /*TCA_ACT_MAX_PRIO is 32, there count up to 32 */ #define TCA_ACT_MAX_PRIO_MASK 0x1FF int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { u32 jmp_prgcnt = 0; u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ int i; int ret = TC_ACT_OK; if (skb_skip_tc_classify(skb)) return TC_ACT_OK; restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; continue; } if (tc_act_skip_sw(a->tcfa_flags)) continue; repeat_ttl = 32; repeat: ret = tc_act(skb, a, res); if (unlikely(ret == TC_ACT_REPEAT)) { if (--repeat_ttl != 0) goto repeat; /* suspicious opcode, stop pipeline */ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); return TC_ACT_OK; } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) { /* faulty opcode, stop pipeline */ return TC_ACT_OK; } else { jmp_ttl -= 1; if (jmp_ttl > 0) goto restart_act_graph; else /* faulty graph, stop pipeline */ return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); } if (ret != TC_ACT_PIPE) break; } return ret; } EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct tc_action *actions[], int bind) { const struct tc_action_ops *ops; struct tc_action *a; int ret = 0, i; tcf_act_for_each_action(i, a, actions) { actions[i] = NULL; ops = a->ops; ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return ret; } return ret; } static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); } static void tcf_action_put_many(struct tc_action *actions[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; if (tcf_action_put(a)) module_put(ops->owner); } } static void tca_put_bound_many(struct tc_action *actions[], int init_res[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; if (init_res[i] == ACT_P_CREATED) continue; if (tcf_action_put(a)) module_put(ops->owner); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { return a->ops->dump(skb, a, bind, ref); } int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse) { struct tc_action *a; int err = -EINVAL, i; struct nlattr *nest; tcf_act_for_each_action(i, a, actions) { nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; err = terse ? tcf_action_dump_terse(skb, a, false) : tcf_action_dump_1(skb, a, bind, ref); if (err < 0) goto errout; nla_nest_end(skb, nest); } return 0; nla_put_failure: err = -EINVAL; errout: nla_nest_cancel(skb, nest); return err; } static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) { struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return NULL; c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); if (!c->data) { kfree(c); return NULL; } c->len = nla_len(tb[TCA_ACT_COOKIE]); return c; } static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr) { struct nla_bitfield32 hw_stats_bf; /* If the user did not pass the attr, that means he does * not care about the type. Return "any" in that case * which is setting on all supported types. */ if (!hw_stats_attr) return TCA_ACT_HW_STATS_ANY; hw_stats_bf = nla_get_bitfield32(hw_stats_attr); return hw_stats_bf.value; } static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS | TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW), [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; if (init_res[i] == ACT_P_BOUND) continue; idrinfo = a->idrinfo; mutex_lock(&idrinfo->lock); /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ idr_replace(&idrinfo->action_idr, a, a->tcfa_index); mutex_unlock(&idrinfo->lock); } } struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags, struct netlink_ext_ack *extack) { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; char act_name[IFNAMSIZ]; struct nlattr *kind; int err; if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) return ERR_PTR(err); err = -EINVAL; kind = tb[TCA_ACT_KIND]; if (!kind) { NL_SET_ERR_MSG(extack, "TC action kind must be specified"); return ERR_PTR(err); } if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(err); } } else { if (strscpy(act_name, "police", IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } } a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { #ifdef CONFIG_MODULES bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); if (rtnl_held) rtnl_unlock(); request_module(NET_ACT_ALIAS_PREFIX "%s", act_name); if (rtnl_held) rtnl_lock(); a_o = tc_lookup_action_n(act_name); /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to * tell the caller to replay the request. We * indicate this using -EAGAIN. */ if (a_o != NULL) { module_put(a_o->owner); return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "Failed to load TC action module"); return ERR_PTR(-ENOENT); } return a_o; } struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action_ops *a_o, int *init_res, u32 flags, struct netlink_ext_ack *extack) { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nla_bitfield32 userflags = { 0, 0 }; struct tc_cookie *user_cookie = NULL; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; int err; /* backward compatibility for policer */ if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) return ERR_PTR(err); if (tb[TCA_ACT_COOKIE]) { user_cookie = nla_memdup_cookie(tb); if (!user_cookie) { NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); err = -ENOMEM; goto err_out; } } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) { userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); if (!tc_act_flags_valid(userflags.value)) { err = -EINVAL; goto err_out; } } err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, userflags.value | flags, extack); } else { err = a_o->init(net, nla, est, &a, tp, userflags.value | flags, extack); } if (err < 0) goto err_out; *init_res = err; if (!police && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->user_cookie, user_cookie); if (!police) a->hw_stats = hw_stats; return a; err_out: if (user_cookie) { kfree(user_cookie->data); kfree(user_cookie); } return ERR_PTR(err); } static bool tc_act_bind(u32 flags) { return !!(flags & TCA_ACT_FLAGS_BIND); } /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 2]; struct tc_action *act; size_t sz = 0; int err; int i; err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO + 1, nla, NULL, extack); if (err < 0) return err; /* The nested attributes are parsed as types, but they are really an * array of actions. So we parse one more than we can handle, and return * an error if the last one is set (as that indicates that the request * contained more than the maximum number of actions). */ if (tb[TCA_ACT_MAX_PRIO + 1]) { NL_SET_ERR_MSG_FMT(extack, "Only %d actions supported per filter", TCA_ACT_MAX_PRIO); return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; a_o = tc_action_load_ops(tb[i], flags, extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; } ops[i - 1] = a_o; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1], &init_res[i - 1], flags, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; } sz += tcf_action_fill_size(act); /* Start from index 0 */ actions[i - 1] = act; if (tc_act_bind(flags)) { bool skip_sw = tc_skip_sw(fl_flags); bool skip_hw = tc_skip_hw(fl_flags); if (tc_act_bind(act->tcfa_flags)) { /* Action is created by classifier and is not * standalone. Check that the user did not set * any action flags different than the * classifier flags, and inherit the flags from * the classifier for the compatibility case * where no flags were specified at all. */ if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) || (tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) { NL_SET_ERR_MSG(extack, "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } if (skip_sw) act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW; if (skip_hw) act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW; continue; } /* Action is standalone */ if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || skip_hw != tc_act_skip_hw(act->tcfa_flags)) { NL_SET_ERR_MSG(extack, "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } } else { err = tcf_action_offload_add(act, extack); if (tc_act_skip_sw(act->tcfa_flags) && err) goto err; } } /* We have to commit them all together, because if any error happened in * between, we could not handle the failure gracefully. */ tcf_idr_insert_many(actions, init_res); *attr_size = tcf_action_full_attrs_size(sz); err = i - 1; goto err_mod; err: tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++) module_put(ops[i]->owner); return err; } void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw) { if (a->cpu_bstats) { _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), bytes, packets); return; } _bstats_update(&a->tcfa_bstats, bytes, packets); atomic_add(drops, &a->tcfa_drops); if (hw) _bstats_update(&a->tcfa_bstats_hw, bytes, packets); } EXPORT_SYMBOL(tcf_action_update_stats); int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, int compat_mode) { struct gnet_stats_queue qstats = {0}; struct gnet_dump d; int err = 0; if (p == NULL) goto errout; /* compat_mode being true specifies a call that is supposed * to add additional backward compatibility statistic TLVs. */ if (compat_mode) { if (p->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, TCA_STATS, TCA_XSTATS, &p->tcfa_lock, &d, TCA_PAD); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, &p->tcfa_lock, &d, TCA_ACT_PAD); if (err < 0) goto errout; qstats.drops = atomic_read(&p->tcfa_drops); qstats.overlimits = atomic_read(&p->tcfa_overlimits); if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfa_bstats, false) < 0 || gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, &p->tcfa_bstats_hw, false) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &qstats, qstats.qlen) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) goto errout; return 0; errout: return -1; } static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], u32 portid, u32 seq, u16 flags, int event, int bind, int ref, struct netlink_ext_ack *extack) { struct tcamsg *t; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; if (extack && extack->_msg && nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; if (tcf_action_dump(skb, actions, bind, ref, false) < 0) goto out_nlmsg_trim; nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nlmsg_trim(skb, b); return -1; } static int tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *actions[], int event, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 1, NULL) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return -EINVAL; } return rtnl_unicast(skb, net, portid); } static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; struct tc_action *a; int index; int err; err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) goto err_out; err = -EINVAL; if (tb[TCA_ACT_INDEX] == NULL || nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) { NL_SET_ERR_MSG(extack, "Invalid TC action index value"); goto err_out; } index = nla_get_u32(tb[TCA_ACT_INDEX]); err = -EINVAL; ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (!ops) { /* could happen in batch of actions */ NL_SET_ERR_MSG(extack, "Specified TC action kind not found"); goto err_out; } err = -ENOENT; if (__tcf_idr_search(net, ops, &a, index) == 0) { NL_SET_ERR_MSG(extack, "TC action with specified index not found"); goto err_mod; } module_put(ops->owner); return a; err_mod: module_put(ops->owner); err_out: return ERR_PTR(err); } static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, struct netlink_ext_ack *extack) { struct sk_buff *skb; unsigned char *b; struct nlmsghdr *nlh; struct tcamsg *t; struct netlink_callback dcb; struct nlattr *nest; struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; struct nlattr *kind; int err = -ENOMEM; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return err; b = skb_tail_pointer(skb); err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; ops = tc_lookup_action(kind); if (!ops) { /*some idjot trying to flush unknown action */ NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action"); goto err_out; } nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) { NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification"); goto out_module_put; } t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) { NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; } err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack); if (err <= 0) { nla_nest_cancel(skb, nest); goto out_module_put; } nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; nlh->nlmsg_flags |= NLM_F_ROOT; module_put(ops->owner); err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); return err; out_module_put: module_put(ops->owner); err_out: kfree_skb(skb); return err; } static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. */ struct tcf_idrinfo *idrinfo = a->idrinfo; u32 act_index = a->tcfa_index; actions[i] = NULL; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); } else { int ret; /* now do the delete */ ret = tcf_idr_delete_index(idrinfo, act_index); if (ret < 0) return ret; } } return 0; } static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net, struct tc_action *action) { size_t attr_size = tcf_action_fill_size(action); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action) { const struct tc_action_ops *ops = action->ops; struct sk_buff *skb; int ret; if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_reoffload_del_notify_msg(net, action); if (IS_ERR(skb)) return PTR_ERR(skb); } ret = tcf_idr_release_unsafe(action); if (ret == ACT_P_DELETED) { module_put(ops->owner); ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0); } else { kfree_skb(skb); } return ret; } int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, void *cb_priv, bool add) { struct tc_act_pernet_id *id_ptr; struct tcf_idrinfo *idrinfo; struct tc_action_net *tn; struct tc_action *p; unsigned int act_id; unsigned long tmp; unsigned long id; struct idr *idr; struct net *net; int ret; if (!cb) return -EINVAL; down_read(&net_rwsem); mutex_lock(&act_id_mutex); for_each_net(net) { list_for_each_entry(id_ptr, &act_pernet_id_list, list) { act_id = id_ptr->id; tn = net_generic(net, act_id); if (!tn) continue; idrinfo = tn->idrinfo; if (!idrinfo) continue; mutex_lock(&idrinfo->lock); idr = &idrinfo->action_idr; idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p) || tc_act_bind(p->tcfa_flags)) continue; if (add) { tcf_action_offload_add_ex(p, NULL, cb, cb_priv); continue; } /* cb unregister to update hw count */ ret = tcf_action_offload_del_ex(p, cb, cb_priv); if (ret < 0) continue; if (tc_act_skip_sw(p->tcfa_flags) && !tc_act_in_hw(p)) tcf_reoffload_del_notify(net, p); } mutex_unlock(&idrinfo->lock); } } mutex_unlock(&act_id_mutex); up_read(&net_rwsem); return 0; } static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 2, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; int ret; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_del_notify_msg(net, n, actions, portid, attr_size, extack); if (IS_ERR(skb)) return PTR_ERR(skb); } /* now do the delete */ ret = tcf_action_delete(net, actions); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); return ret; } return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int event, struct netlink_ext_ack *extack) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t attr_size = 0; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) return ret; if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1]) return tca_action_flush(net, tb[1], n, portid, extack); NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action"); return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_get_1(net, tb[i], n, portid, extack); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; } attr_size += tcf_action_fill_size(act); actions[i - 1] = act; } attr_size = tcf_action_full_attrs_size(attr_size); if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, actions, event, extack); else { /* delete */ ret = tcf_del_notify(net, n, actions, portid, attr_size, extack); if (ret) goto err; return 0; } err: tcf_action_put_many(actions); return ret; } static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_add_notify_msg(net, n, actions, portid, attr_size, extack); if (IS_ERR(skb)) return PTR_ERR(skb); } return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, u32 flags, struct netlink_ext_ack *extack) { size_t attr_size = 0; int loop, ret; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, &attr_size, flags, 0, extack); if (ret != -EAGAIN) break; } if (ret < 0) return ret; ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); /* only put bound actions */ tca_put_bound_many(actions, init_res); return ret; } static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON | TCA_ACT_FLAG_TERSE_DUMP), [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, }; static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = NETLINK_CB(skb).portid; u32 flags = 0; int ret = 0; if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; if (tca[TCA_ACT_TAB] == NULL) { NL_SET_ERR_MSG(extack, "Netlink action attributes missing"); return -EINVAL; } /* n->nlmsg_flags & NLM_F_CREATE */ switch (n->nlmsg_type) { case RTM_NEWACTION: /* we are going to assume all other flags * imply create only if it doesn't exist * Note that CREATE | EXCL implies that * but since we want avoid ambiguity (eg when flags * is zero) then just set this */ if (n->nlmsg_flags & NLM_F_REPLACE) flags = TCA_ACT_FLAGS_REPLACE; ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags, extack); break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, portid, RTM_DELACTION, extack); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, portid, RTM_GETACTION, extack); break; default: BUG(); } return ret; } static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct nlattr *kind; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; return kind; } static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); struct nlattr *tb[TCA_ROOT_MAX + 1]; struct nlattr *count_attr = NULL; unsigned long jiffy_since = 0; struct nlattr *kind = NULL; struct nla_bitfield32 bf; u32 msecs_since = 0; u32 act_count = 0; ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, tcaa_policy, cb->extack); if (ret < 0) return ret; kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; } a_o = tc_lookup_action(kind); if (a_o == NULL) return 0; cb->args[2] = 0; if (tb[TCA_ROOT_FLAGS]) { bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); cb->args[2] = bf.value; } if (tb[TCA_ROOT_TIME_DELTA]) { msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; if (msecs_since) jiffy_since = jiffies - msecs_to_jiffies(msecs_since); t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; cb->args[3] = jiffy_since; count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); if (!count_attr) goto out_module_put; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (nest == NULL) goto out_module_put; ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; act_count = cb->args[1]; memcpy(nla_data(count_attr), &act_count, sizeof(u32)); cb->args[1] = 0; } else nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; out_module_put: module_put(a_o->owner); nlmsg_trim(skb, b); return skb->len; } static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action}, {.msgtype = RTM_DELACTION, .doit = tc_ctl_action}, {.msgtype = RTM_GETACTION, .doit = tc_ctl_action, .dumpit = tc_dump_action}, }; static int __init tc_action_init(void) { rtnl_register_many(tc_action_rtnl_msg_handlers); return 0; } subsys_initcall(tc_action_init); |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef CEPH_MSGR_H #define CEPH_MSGR_H /* * Data types for message passing layer used by Ceph. */ #define CEPH_MON_PORT 6789 /* default monitor port */ /* * tcp connection banner. include a protocol version. and adjust * whenever the wire protocol changes. try to keep this string length * constant. */ #define CEPH_BANNER "ceph v027" #define CEPH_BANNER_LEN 9 #define CEPH_BANNER_MAX_LEN 30 /* * messenger V2 connection banner prefix. * The full banner string should have the form: "ceph v2\n<le16>" * the 2 bytes are the length of the remaining banner. */ #define CEPH_BANNER_V2 "ceph v2\n" #define CEPH_BANNER_V2_LEN 8 #define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16)) /* * messenger V2 features */ #define CEPH_MSGR2_INCARNATION_1 (0ull) #define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name = \ (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); #define HAVE_MSGR2_FEATURE(x, name) \ (((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name)) DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1 #define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) #define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) /* * Rollover-safe type and comparator for 32-bit sequence numbers. * Comparator returns -1, 0, or 1. */ typedef __u32 ceph_seq_t; static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) { return (__s32)a - (__s32)b; } /* * entity_name -- logical name for a process participating in the * network, e.g. 'mds0' or 'osd3'. */ struct ceph_entity_name { __u8 type; /* CEPH_ENTITY_TYPE_* */ __le64 num; } __attribute__ ((packed)); #define CEPH_ENTITY_TYPE_MON 0x01 #define CEPH_ENTITY_TYPE_MDS 0x02 #define CEPH_ENTITY_TYPE_OSD 0x04 #define CEPH_ENTITY_TYPE_CLIENT 0x08 #define CEPH_ENTITY_TYPE_AUTH 0x20 #define CEPH_ENTITY_TYPE_ANY 0xFF extern const char *ceph_entity_type_name(int type); /* * entity_addr -- network address */ struct ceph_entity_addr { __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */ __le32 nonce; /* unique id for process (e.g. pid) */ struct sockaddr_storage in_addr; } __attribute__ ((packed)); static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs, const struct ceph_entity_addr *rhs) { return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) && lhs->nonce == rhs->nonce; } struct ceph_entity_inst { struct ceph_entity_name name; struct ceph_entity_addr addr; } __attribute__ ((packed)); /* used by message exchange protocol */ #define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ #define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ #define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing incoming connection */ #define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again with higher cseq */ #define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again with higher gseq */ #define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ #define CEPH_MSGR_TAG_MSG 7 /* message */ #define CEPH_MSGR_TAG_ACK 8 /* message ack */ #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ #define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ /* * connection negotiation */ struct ceph_msg_connect { __le64 features; /* supported feature bits */ __le32 host_type; /* CEPH_ENTITY_TYPE_* */ __le32 global_seq; /* count connections initiated by this host */ __le32 connect_seq; /* count connections initiated in this session */ __le32 protocol_version; __le32 authorizer_protocol; __le32 authorizer_len; __u8 flags; /* CEPH_MSG_CONNECT_* */ } __attribute__ ((packed)); struct ceph_msg_connect_reply { __u8 tag; __le64 features; /* feature bits for this session */ __le32 global_seq; __le32 connect_seq; __le32 protocol_version; __le32 authorizer_len; __u8 flags; } __attribute__ ((packed)); #define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ /* * message header */ struct ceph_msg_header_old { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ __le16 priority; /* priority. higher value == higher priority */ __le16 version; /* version of message encoding */ __le32 front_len; /* bytes in main payload */ __le32 middle_len;/* bytes in middle payload */ __le32 data_len; /* bytes of data payload */ __le16 data_off; /* sender: include full offset; receiver: mask against ~PAGE_MASK */ struct ceph_entity_inst src, orig_src; __le32 reserved; __le32 crc; /* header crc32c */ } __attribute__ ((packed)); struct ceph_msg_header { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ __le16 priority; /* priority. higher value == higher priority */ __le16 version; /* version of message encoding */ __le32 front_len; /* bytes in main payload */ __le32 middle_len;/* bytes in middle payload */ __le32 data_len; /* bytes of data payload */ __le16 data_off; /* sender: include full offset; receiver: mask against ~PAGE_MASK */ struct ceph_entity_name src; __le16 compat_version; __le16 reserved; __le32 crc; /* header crc32c */ } __attribute__ ((packed)); struct ceph_msg_header2 { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ __le16 priority; /* priority. higher value == higher priority */ __le16 version; /* version of message encoding */ __le32 data_pre_padding_len; __le16 data_off; /* sender: include full offset; receiver: mask against ~PAGE_MASK */ __le64 ack_seq; __u8 flags; /* oldest code we think can decode this. unknown if zero. */ __le16 compat_version; __le16 reserved; } __attribute__ ((packed)); #define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_HIGH 196 #define CEPH_MSG_PRIO_HIGHEST 255 /* * follows data payload */ struct ceph_msg_footer_old { __le32 front_crc, middle_crc, data_crc; __u8 flags; } __attribute__ ((packed)); struct ceph_msg_footer { __le32 front_crc, middle_crc, data_crc; // sig holds the 64 bits of the digital signature for the message PLR __le64 sig; __u8 flags; } __attribute__ ((packed)); #define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ #define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ #define CEPH_MSG_FOOTER_SIGNED (1<<2) /* msg was signed */ #endif |
| 43 43 1 1 1 43 43 43 43 43 43 43 43 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sunrpc/cache.c * * Generic code for various authentication-related caches * used by sunrpc clients and servers. * * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> */ #include <linux/types.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/net.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/pagemap.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <trace/events/sunrpc.h> #include "netns.h" #include "fail.h" #define RPCDBG_FACILITY RPCDBG_CACHE static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h, struct cache_detail *detail) { time64_t now = seconds_since_boot(); INIT_HLIST_NODE(&h->cache_list); h->flags = 0; kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; if (now <= detail->flush_time) /* ensure it isn't already expired */ now = detail->flush_time + 1; h->last_refresh = now; } static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, struct cache_head *key, int hash) { struct hlist_head *head = &detail->hash_table[hash]; struct cache_head *tmp; rcu_read_lock(); hlist_for_each_entry_rcu(tmp, head, cache_list) { if (!detail->match(tmp, key)) continue; if (test_bit(CACHE_VALID, &tmp->flags) && cache_is_expired(detail, tmp)) continue; tmp = cache_get_rcu(tmp); rcu_read_unlock(); return tmp; } rcu_read_unlock(); return NULL; } static void sunrpc_begin_cache_remove_entry(struct cache_head *ch, struct cache_detail *cd) { /* Must be called under cd->hash_lock */ hlist_del_init_rcu(&ch->cache_list); set_bit(CACHE_CLEANED, &ch->flags); cd->entries --; } static void sunrpc_end_cache_remove_entry(struct cache_head *ch, struct cache_detail *cd) { cache_fresh_unlocked(ch, cd); cache_put(ch, cd); } static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, struct cache_head *key, int hash) { struct cache_head *new, *tmp, *freeme = NULL; struct hlist_head *head = &detail->hash_table[hash]; new = detail->alloc(); if (!new) return NULL; /* must fully initialise 'new', else * we might get lose if we need to * cache_put it soon. */ cache_init(new, detail); detail->init(new, key); spin_lock(&detail->hash_lock); /* check if entry appeared while we slept */ hlist_for_each_entry_rcu(tmp, head, cache_list, lockdep_is_held(&detail->hash_lock)) { if (!detail->match(tmp, key)) continue; if (test_bit(CACHE_VALID, &tmp->flags) && cache_is_expired(detail, tmp)) { sunrpc_begin_cache_remove_entry(tmp, detail); trace_cache_entry_expired(detail, tmp); freeme = tmp; break; } cache_get(tmp); spin_unlock(&detail->hash_lock); cache_put(new, detail); return tmp; } hlist_add_head_rcu(&new->cache_list, head); detail->entries++; if (detail->nextcheck > new->expiry_time) detail->nextcheck = new->expiry_time + 1; cache_get(new); spin_unlock(&detail->hash_lock); if (freeme) sunrpc_end_cache_remove_entry(freeme, detail); return new; } struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail, struct cache_head *key, int hash) { struct cache_head *ret; ret = sunrpc_cache_find_rcu(detail, key, hash); if (ret) return ret; /* Didn't find anything, insert an empty entry */ return sunrpc_cache_add_entry(detail, key, hash); } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu); static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time64_t expiry, struct cache_detail *detail) { time64_t now = seconds_since_boot(); if (now <= detail->flush_time) /* ensure it isn't immediately treated as expired */ now = detail->flush_time + 1; head->expiry_time = expiry; head->last_refresh = now; smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ set_bit(CACHE_VALID, &head->flags); } static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail) { if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { cache_revisit_request(head); cache_dequeue(detail, head); } } static void cache_make_negative(struct cache_detail *detail, struct cache_head *h) { set_bit(CACHE_NEGATIVE, &h->flags); trace_cache_entry_make_negative(detail, h); } static void cache_entry_update(struct cache_detail *detail, struct cache_head *h, struct cache_head *new) { if (!test_bit(CACHE_NEGATIVE, &new->flags)) { detail->update(h, new); trace_cache_entry_update(detail, h); } else { cache_make_negative(detail, h); } } struct cache_head *sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash) { /* The 'old' entry is to be replaced by 'new'. * If 'old' is not VALID, we update it directly, * otherwise we need to replace it */ struct cache_head *tmp; if (!test_bit(CACHE_VALID, &old->flags)) { spin_lock(&detail->hash_lock); if (!test_bit(CACHE_VALID, &old->flags)) { cache_entry_update(detail, old, new); cache_fresh_locked(old, new->expiry_time, detail); spin_unlock(&detail->hash_lock); cache_fresh_unlocked(old, detail); return old; } spin_unlock(&detail->hash_lock); } /* We need to insert a new entry */ tmp = detail->alloc(); if (!tmp) { cache_put(old, detail); return NULL; } cache_init(tmp, detail); detail->init(tmp, old); spin_lock(&detail->hash_lock); cache_entry_update(detail, tmp, new); hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); detail->entries++; cache_get(tmp); cache_fresh_locked(tmp, new->expiry_time, detail); cache_fresh_locked(old, 0, detail); spin_unlock(&detail->hash_lock); cache_fresh_unlocked(tmp, detail); cache_fresh_unlocked(old, detail); cache_put(old, detail); return tmp; } EXPORT_SYMBOL_GPL(sunrpc_cache_update); static inline int cache_is_valid(struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; else { /* entry is valid */ if (test_bit(CACHE_NEGATIVE, &h->flags)) return -ENOENT; else { /* * In combination with write barrier in * sunrpc_cache_update, ensures that anyone * using the cache entry after this sees the * updated contents: */ smp_rmb(); return 0; } } } static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h) { int rv; spin_lock(&detail->hash_lock); rv = cache_is_valid(h); if (rv == -EAGAIN) { cache_make_negative(detail, h); cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY, detail); rv = -ENOENT; } spin_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); return rv; } int cache_check_rcu(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp) { int rv; time64_t refresh_age, age; /* First decide return status as best we can */ rv = cache_is_valid(h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); age = seconds_since_boot() - h->last_refresh; if (rqstp == NULL) { if (rv == -EAGAIN) rv = -ENOENT; } else if (rv == -EAGAIN || (h->expiry_time != 0 && age > refresh_age/2)) { dprintk("RPC: Want update, refage=%lld, age=%lld\n", refresh_age, age); switch (detail->cache_upcall(detail, h)) { case -EINVAL: rv = try_to_negate_entry(detail, h); break; case -EAGAIN: cache_fresh_unlocked(h, detail); break; } } if (rv == -EAGAIN) { if (!cache_defer_req(rqstp, h)) { /* * Request was not deferred; handle it as best * we can ourselves: */ rv = cache_is_valid(h); if (rv == -EAGAIN) rv = -ETIMEDOUT; } } return rv; } EXPORT_SYMBOL_GPL(cache_check_rcu); /* * This is the generic cache management routine for all * the authentication caches. * It checks the currency of a cache item and will (later) * initiate an upcall to fill it if needed. * * * Returns 0 if the cache_head can be used, or cache_puts it and returns * -EAGAIN if upcall is pending and request has been queued * -ETIMEDOUT if upcall failed or request could not be queue or * upcall completed but item is still invalid (implying that * the cache item has been replaced with a newer one). * -ENOENT if cache entry was negative */ int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp) { int rv; rv = cache_check_rcu(detail, h, rqstp); if (rv) cache_put(h, detail); return rv; } EXPORT_SYMBOL_GPL(cache_check); /* * caches need to be periodically cleaned. * For this we maintain a list of cache_detail and * a current pointer into that list and into the table * for that entry. * * Each time cache_clean is called it finds the next non-empty entry * in the current table and walks the list in that entry * looking for entries that can be removed. * * An entry gets removed if: * - The expiry is before current time * - The last_refresh time is before the flush_time for that cache * * later we might drop old entries with non-NEVER expiry if that table * is getting 'full' for some definition of 'full' * * The question of "how often to scan a table" is an interesting one * and is answered in part by the use of the "nextcheck" field in the * cache_detail. * When a scan of a table begins, the nextcheck field is set to a time * that is well into the future. * While scanning, if an expiry time is found that is earlier than the * current nextcheck time, nextcheck is set to that expiry time. * If the flush_time is ever set to a time earlier than the nextcheck * time, the nextcheck time is then set to that flush_time. * * A table is then only scanned if the current time is at least * the nextcheck time. * */ static LIST_HEAD(cache_list); static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; static void do_cache_clean(struct work_struct *work); static struct delayed_work cache_cleaner; void sunrpc_init_cache_detail(struct cache_detail *cd) { spin_lock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); cd->nextcheck = 0; cd->entries = 0; atomic_set(&cd->writers, 0); cd->last_close = 0; cd->last_warn = -1; list_add(&cd->others, &cache_list); spin_unlock(&cache_list_lock); /* start the cleaning process */ queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0); } EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); spin_lock(&cd->hash_lock); if (current_detail == cd) current_detail = NULL; list_del_init(&cd->others); spin_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); } } EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); /* clean cache tries to find something to clean * and cleans it. * It returns 1 if it cleaned something, * 0 if it didn't find anything this time * -1 if it fell off the end of the list. */ static int cache_clean(void) { int rv = 0; struct list_head *next; spin_lock(&cache_list_lock); /* find a suitable table if we don't already have one */ while (current_detail == NULL || current_index >= current_detail->hash_size) { if (current_detail) next = current_detail->others.next; else next = cache_list.next; if (next == &cache_list) { current_detail = NULL; spin_unlock(&cache_list_lock); return -1; } current_detail = list_entry(next, struct cache_detail, others); if (current_detail->nextcheck > seconds_since_boot()) current_index = current_detail->hash_size; else { current_index = 0; current_detail->nextcheck = seconds_since_boot()+30*60; } } spin_lock(¤t_detail->hash_lock); /* find a non-empty bucket in the table */ while (current_index < current_detail->hash_size && hlist_empty(¤t_detail->hash_table[current_index])) current_index++; /* find a cleanable entry in the bucket and clean it, or set to next bucket */ if (current_index < current_detail->hash_size) { struct cache_head *ch = NULL; struct cache_detail *d; struct hlist_head *head; struct hlist_node *tmp; /* Ok, now to clean this strand */ head = ¤t_detail->hash_table[current_index]; hlist_for_each_entry_safe(ch, tmp, head, cache_list) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; if (!cache_is_expired(current_detail, ch)) continue; sunrpc_begin_cache_remove_entry(ch, current_detail); trace_cache_entry_expired(current_detail, ch); rv = 1; break; } spin_unlock(¤t_detail->hash_lock); d = current_detail; if (!ch) current_index ++; spin_unlock(&cache_list_lock); if (ch) sunrpc_end_cache_remove_entry(ch, d); } else { spin_unlock(¤t_detail->hash_lock); spin_unlock(&cache_list_lock); } return rv; } /* * We want to regularly clean the cache, so we need to schedule some work ... */ static void do_cache_clean(struct work_struct *work) { int delay; if (list_empty(&cache_list)) return; if (cache_clean() == -1) delay = round_jiffies_relative(30*HZ); else delay = 5; queue_delayed_work(system_power_efficient_wq, &cache_cleaner, delay); } /* * Clean all caches promptly. This just calls cache_clean * repeatedly until we are sure that every cache has had a chance to * be fully cleaned */ void cache_flush(void) { while (cache_clean() != -1) cond_resched(); while (cache_clean() != -1) cond_resched(); } EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { struct cache_head *ch = NULL; struct hlist_head *head = NULL; int i = 0; spin_lock(&detail->hash_lock); if (!detail->entries) { spin_unlock(&detail->hash_lock); return; } dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name); for (i = 0; i < detail->hash_size; i++) { head = &detail->hash_table[i]; while (!hlist_empty(head)) { ch = hlist_entry(head->first, struct cache_head, cache_list); sunrpc_begin_cache_remove_entry(ch, detail); spin_unlock(&detail->hash_lock); sunrpc_end_cache_remove_entry(ch, detail); spin_lock(&detail->hash_lock); } } spin_unlock(&detail->hash_lock); } EXPORT_SYMBOL_GPL(cache_purge); /* * Deferral and Revisiting of Requests. * * If a cache lookup finds a pending entry, we * need to defer the request and revisit it later. * All deferred requests are stored in a hash table, * indexed by "struct cache_head *". * As it may be wasteful to store a whole request * structure, we allow the request to provide a * deferred form, which must contain a * 'struct cache_deferred_req' * This cache_deferred_req contains a method to allow * it to be revisited when cache info is available */ #define DFR_HASHSIZE (PAGE_SIZE/sizeof(struct list_head)) #define DFR_HASH(item) ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE) #define DFR_MAX 300 /* ??? */ static DEFINE_SPINLOCK(cache_defer_lock); static LIST_HEAD(cache_defer_list); static struct hlist_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; static void __unhash_deferred_req(struct cache_deferred_req *dreq) { hlist_del_init(&dreq->hash); if (!list_empty(&dreq->recent)) { list_del_init(&dreq->recent); cache_defer_cnt--; } } static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item) { int hash = DFR_HASH(item); INIT_LIST_HEAD(&dreq->recent); hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); } static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item, int count_me) { dreq->item = item; spin_lock(&cache_defer_lock); __hash_deferred_req(dreq, item); if (count_me) { cache_defer_cnt++; list_add(&dreq->recent, &cache_defer_list); } spin_unlock(&cache_defer_lock); } struct thread_deferred_req { struct cache_deferred_req handle; struct completion completion; }; static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) { struct thread_deferred_req *dr = container_of(dreq, struct thread_deferred_req, handle); complete(&dr->completion); } static void cache_wait_req(struct cache_req *req, struct cache_head *item) { struct thread_deferred_req sleeper; struct cache_deferred_req *dreq = &sleeper.handle; sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); dreq->revisit = cache_restart_thread; setup_deferral(dreq, item, 0); if (!test_bit(CACHE_PENDING, &item->flags) || wait_for_completion_interruptible_timeout( &sleeper.completion, req->thread_wait) <= 0) { /* The completion wasn't completed, so we need * to clean up */ spin_lock(&cache_defer_lock); if (!hlist_unhashed(&sleeper.handle.hash)) { __unhash_deferred_req(&sleeper.handle); spin_unlock(&cache_defer_lock); } else { /* cache_revisit_request already removed * this from the hash table, but hasn't * called ->revisit yet. It will very soon * and we need to wait for it. */ spin_unlock(&cache_defer_lock); wait_for_completion(&sleeper.completion); } } } static void cache_limit_defers(void) { /* Make sure we haven't exceed the limit of allowed deferred * requests. */ struct cache_deferred_req *discard = NULL; if (cache_defer_cnt <= DFR_MAX) return; spin_lock(&cache_defer_lock); /* Consider removing either the first or the last */ if (cache_defer_cnt > DFR_MAX) { if (get_random_u32_below(2)) discard = list_entry(cache_defer_list.next, struct cache_deferred_req, recent); else discard = list_entry(cache_defer_list.prev, struct cache_deferred_req, recent); __unhash_deferred_req(discard); } spin_unlock(&cache_defer_lock); if (discard) discard->revisit(discard, 1); } #if IS_ENABLED(CONFIG_FAIL_SUNRPC) static inline bool cache_defer_immediately(void) { return !fail_sunrpc.ignore_cache_wait && should_fail(&fail_sunrpc.attr, 1); } #else static inline bool cache_defer_immediately(void) { return false; } #endif /* Return true if and only if a deferred request is queued. */ static bool cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; if (!cache_defer_immediately()) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) return false; } dreq = req->defer(req); if (dreq == NULL) return false; setup_deferral(dreq, item, 1); if (!test_bit(CACHE_PENDING, &item->flags)) /* Bit could have been cleared before we managed to * set up the deferral, so need to revisit just in case */ cache_revisit_request(item); cache_limit_defers(); return true; } static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct hlist_node *tmp; int hash = DFR_HASH(item); LIST_HEAD(pending); spin_lock(&cache_defer_lock); hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash) if (dreq->item == item) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); } spin_unlock(&cache_defer_lock); while (!list_empty(&pending)) { dreq = list_entry(pending.next, struct cache_deferred_req, recent); list_del_init(&dreq->recent); dreq->revisit(dreq, 0); } } void cache_clean_deferred(void *owner) { struct cache_deferred_req *dreq, *tmp; LIST_HEAD(pending); spin_lock(&cache_defer_lock); list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { if (dreq->owner == owner) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); } } spin_unlock(&cache_defer_lock); while (!list_empty(&pending)) { dreq = list_entry(pending.next, struct cache_deferred_req, recent); list_del_init(&dreq->recent); dreq->revisit(dreq, 1); } } /* * communicate with user-space * * We have a magic /proc file - /proc/net/rpc/<cachename>/channel. * On read, you get a full request, or block. * On write, an update request is processed. * Poll works if anything to read, and always allows write. * * Implemented by linked list of requests. Each open file has * a ->private that also exists in this list. New requests are added * to the end and may wakeup and preceding readers. * New readers are added to the head. If, on read, an item is found with * CACHE_UPCALLING clear, we free it from the list. * */ static DEFINE_SPINLOCK(queue_lock); struct cache_queue { struct list_head list; int reader; /* if 0, then request */ }; struct cache_request { struct cache_queue q; struct cache_head *item; char * buf; int len; int readers; }; struct cache_reader { struct cache_queue q; int offset; /* if non-0, we have a refcnt on next request */ }; static int cache_request(struct cache_detail *detail, struct cache_request *crq) { char *bp = crq->buf; int len = PAGE_SIZE; detail->cache_request(detail, crq->item, &bp, &len); if (len < 0) return -E2BIG; return PAGE_SIZE - len; } static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; struct inode *inode = file_inode(filp); int err; if (count == 0) return 0; inode_lock(inode); /* protect against multiple concurrent * readers on this file */ again: spin_lock(&queue_lock); /* need to find next request */ while (rp->q.list.next != &cd->queue && list_entry(rp->q.list.next, struct cache_queue, list) ->reader) { struct list_head *next = rp->q.list.next; list_move(&rp->q.list, next); } if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); inode_unlock(inode); WARN_ON_ONCE(rp->offset); return 0; } rq = container_of(rp->q.list.next, struct cache_request, q.list); WARN_ON_ONCE(rq->q.reader); if (rp->offset == 0) rq->readers++; spin_unlock(&queue_lock); if (rq->len == 0) { err = cache_request(cd, rq); if (err < 0) goto out; rq->len = err; } if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; spin_lock(&queue_lock); list_move(&rp->q.list, &rq->q.list); spin_unlock(&queue_lock); } else { if (rp->offset + count > rq->len) count = rq->len - rp->offset; err = -EFAULT; if (copy_to_user(buf, rq->buf + rp->offset, count)) goto out; rp->offset += count; if (rp->offset >= rq->len) { rp->offset = 0; spin_lock(&queue_lock); list_move(&rp->q.list, &rq->q.list); spin_unlock(&queue_lock); } err = 0; } out: if (rp->offset == 0) { /* need to release rq */ spin_lock(&queue_lock); rq->readers--; if (rq->readers == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { list_del(&rq->q.list); spin_unlock(&queue_lock); cache_put(rq->item, cd); kfree(rq->buf); kfree(rq); } else spin_unlock(&queue_lock); } if (err == -EAGAIN) goto again; inode_unlock(inode); return err ? err : count; } static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, size_t count, struct cache_detail *cd) { ssize_t ret; if (count == 0) return -EINVAL; if (copy_from_user(kaddr, buf, count)) return -EFAULT; kaddr[count] = '\0'; ret = cd->cache_parse(cd, kaddr, count); if (!ret) ret = count; return ret; } static ssize_t cache_downcall(struct address_space *mapping, const char __user *buf, size_t count, struct cache_detail *cd) { char *write_buf; ssize_t ret = -ENOMEM; if (count >= 32768) { /* 32k is max userland buffer, lets check anyway */ ret = -EINVAL; goto out; } write_buf = kvmalloc(count + 1, GFP_KERNEL); if (!write_buf) goto out; ret = cache_do_downcall(write_buf, buf, count, cd); kvfree(write_buf); out: return ret; } static ssize_t cache_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { struct address_space *mapping = filp->f_mapping; struct inode *inode = file_inode(filp); ssize_t ret = -EINVAL; if (!cd->cache_parse) goto out; inode_lock(inode); ret = cache_downcall(mapping, buf, count, cd); inode_unlock(inode); out: return ret; } static DECLARE_WAIT_QUEUE_HEAD(queue_wait); static __poll_t cache_poll(struct file *filp, poll_table *wait, struct cache_detail *cd) { __poll_t mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; poll_wait(filp, &queue_wait, wait); /* alway allow write */ mask = EPOLLOUT | EPOLLWRNORM; if (!rp) return mask; spin_lock(&queue_lock); for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { mask |= EPOLLIN | EPOLLRDNORM; break; } spin_unlock(&queue_lock); return mask; } static int cache_ioctl(struct inode *ino, struct file *filp, unsigned int cmd, unsigned long arg, struct cache_detail *cd) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; if (cmd != FIONREAD || !rp) return -EINVAL; spin_lock(&queue_lock); /* only find the length remaining in current request, * or the length of the next request */ for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { struct cache_request *cr = container_of(cq, struct cache_request, q); len = cr->len - rp->offset; break; } spin_unlock(&queue_lock); return put_user(len, (int __user *)arg); } static int cache_open(struct inode *inode, struct file *filp, struct cache_detail *cd) { struct cache_reader *rp = NULL; if (!cd || !try_module_get(cd->owner)) return -EACCES; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) { module_put(cd->owner); return -ENOMEM; } rp->offset = 0; rp->q.reader = 1; spin_lock(&queue_lock); list_add(&rp->q.list, &cd->queue); spin_unlock(&queue_lock); } if (filp->f_mode & FMODE_WRITE) atomic_inc(&cd->writers); filp->private_data = rp; return 0; } static int cache_release(struct inode *inode, struct file *filp, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; if (rp) { spin_lock(&queue_lock); if (rp->offset) { struct cache_queue *cq; for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { container_of(cq, struct cache_request, q) ->readers--; break; } rp->offset = 0; } list_del(&rp->q.list); spin_unlock(&queue_lock); filp->private_data = NULL; kfree(rp); } if (filp->f_mode & FMODE_WRITE) { atomic_dec(&cd->writers); cd->last_close = seconds_since_boot(); } module_put(cd->owner); return 0; } static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq, *tmp; struct cache_request *cr; LIST_HEAD(dequeued); spin_lock(&queue_lock); list_for_each_entry_safe(cq, tmp, &detail->queue, list) if (!cq->reader) { cr = container_of(cq, struct cache_request, q); if (cr->item != ch) continue; if (test_bit(CACHE_PENDING, &ch->flags)) /* Lost a race and it is pending again */ break; if (cr->readers != 0) continue; list_move(&cr->q.list, &dequeued); } spin_unlock(&queue_lock); while (!list_empty(&dequeued)) { cr = list_entry(dequeued.next, struct cache_request, q.list); list_del(&cr->q.list); cache_put(cr->item, detail); kfree(cr->buf); kfree(cr); } } /* * Support routines for text-based upcalls. * Fields are separated by spaces. * Fields are either mangled to quote space tab newline slosh with slosh * or a hexified with a leading \x * Record is terminated with newline. * */ void qword_add(char **bpp, int *lp, char *str) { char *bp = *bpp; int len = *lp; int ret; if (len < 0) return; ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t"); if (ret >= len) { bp += len; len = -1; } else { bp += ret; len -= ret; *bp++ = ' '; len--; } *bpp = bp; *lp = len; } EXPORT_SYMBOL_GPL(qword_add); void qword_addhex(char **bpp, int *lp, char *buf, int blen) { char *bp = *bpp; int len = *lp; if (len < 0) return; if (len > 2) { *bp++ = '\\'; *bp++ = 'x'; len -= 2; while (blen && len >= 2) { bp = hex_byte_pack(bp, *buf++); len -= 2; blen--; } } if (blen || len<1) len = -1; else { *bp++ = ' '; len--; } *bpp = bp; *lp = len; } EXPORT_SYMBOL_GPL(qword_addhex); static void warn_no_listener(struct cache_detail *detail) { if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) detail->warn_no_listener(detail, detail->last_close != 0); } } static bool cache_listeners_exist(struct cache_detail *detail) { if (atomic_read(&detail->writers)) return true; if (detail->last_close == 0) /* This cache was never opened */ return false; if (detail->last_close < seconds_since_boot() - 30) /* * We allow for the possibility that someone might * restart a userspace daemon without restarting the * server; but after 30 seconds, we give up. */ return false; return true; } /* * register an upcall request to user-space and queue it up for read() by the * upcall daemon. * * Each request is at most one page long. */ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; struct cache_request *crq; int ret = 0; if (test_bit(CACHE_CLEANED, &h->flags)) /* Too late to make an upcall */ return -EAGAIN; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) return -EAGAIN; crq = kmalloc(sizeof (*crq), GFP_KERNEL); if (!crq) { kfree(buf); return -EAGAIN; } crq->q.reader = 0; crq->buf = buf; crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); if (test_bit(CACHE_PENDING, &h->flags)) { crq->item = cache_get(h); list_add_tail(&crq->q.list, &detail->queue); trace_cache_entry_upcall(detail, h); } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; spin_unlock(&queue_lock); wake_up(&queue_wait); if (ret == -EAGAIN) { kfree(buf); kfree(crq); } return ret; } int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { if (test_and_set_bit(CACHE_PENDING, &h->flags)) return 0; return cache_pipe_upcall(detail, h); } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); int sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, struct cache_head *h) { if (!cache_listeners_exist(detail)) { warn_no_listener(detail); trace_cache_entry_no_listener(detail, h); return -EINVAL; } return sunrpc_cache_pipe_upcall(detail, h); } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall_timeout); /* * parse a message from user-space and pass it * to an appropriate cache * Messages are, like requests, separated into fields by * spaces and dequotes as \xHEXSTRING or embedded \nnn octal * * Message is * reply cachename expiry key ... content.... * * key and content are both parsed by cache */ int qword_get(char **bpp, char *dest, int bufsize) { /* return bytes copied, or -1 on error */ char *bp = *bpp; int len = 0; while (*bp == ' ') bp++; if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; while (len < bufsize - 1) { int h, l; h = hex_to_bin(bp[0]); if (h < 0) break; l = hex_to_bin(bp[1]); if (l < 0) break; *dest++ = (h << 4) | l; bp += 2; len++; } } else { /* text with \nnn octal quoting */ while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) { if (*bp == '\\' && isodigit(bp[1]) && (bp[1] <= '3') && isodigit(bp[2]) && isodigit(bp[3])) { int byte = (*++bp -'0'); bp++; byte = (byte << 3) | (*bp++ - '0'); byte = (byte << 3) | (*bp++ - '0'); *dest++ = byte; len++; } else { *dest++ = *bp++; len++; } } } if (*bp != ' ' && *bp != '\n' && *bp != '\0') return -1; while (*bp == ' ') bp++; *bpp = bp; *dest = '\0'; return len; } EXPORT_SYMBOL_GPL(qword_get); /* * support /proc/net/rpc/$CACHENAME/content * as a seqfile. * We call ->cache_show passing NULL for the item to * get a header, then pass each real item in the cache */ static void *__cache_seq_start(struct seq_file *m, loff_t *pos) { loff_t n = *pos; unsigned int hash, entry; struct cache_head *ch; struct cache_detail *cd = m->private; if (!n--) return SEQ_START_TOKEN; hash = n >> 32; entry = n & ((1LL<<32) - 1); hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; n &= ~((1LL<<32) - 1); do { hash++; n += 1LL<<32; } while(hash < cd->hash_size && hlist_empty(&cd->hash_table[hash])); if (hash >= cd->hash_size) return NULL; *pos = n+1; return hlist_entry_safe(rcu_dereference_raw( hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) hash = 0; else if (ch->cache_list.next == NULL) { hash++; *pos += 1LL<<32; } else { ++*pos; return hlist_entry_safe(rcu_dereference_raw( hlist_next_rcu(&ch->cache_list)), struct cache_head, cache_list); } *pos &= ~((1LL<<32) - 1); while (hash < cd->hash_size && hlist_empty(&cd->hash_table[hash])) { hash++; *pos += 1LL<<32; } if (hash >= cd->hash_size) return NULL; ++*pos; return hlist_entry_safe(rcu_dereference_raw( hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return __cache_seq_start(m, pos); } EXPORT_SYMBOL_GPL(cache_seq_start_rcu); void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos) { return cache_seq_next(file, p, pos); } EXPORT_SYMBOL_GPL(cache_seq_next_rcu); void cache_seq_stop_rcu(struct seq_file *m, void *p) __releases(RCU) { rcu_read_unlock(); } EXPORT_SYMBOL_GPL(cache_seq_stop_rcu); static int c_show(struct seq_file *m, void *p) { struct cache_head *cp = p; struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) return cd->cache_show(m, cd, NULL); ifdebug(CACHE) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); if (cache_check_rcu(cd, cp, NULL)) seq_puts(m, "# "); else if (cache_is_expired(cd, cp)) seq_puts(m, "# "); return cd->cache_show(m, cd, cp); } static const struct seq_operations cache_content_op = { .start = cache_seq_start_rcu, .next = cache_seq_next_rcu, .stop = cache_seq_stop_rcu, .show = c_show, }; static int content_open(struct inode *inode, struct file *file, struct cache_detail *cd) { struct seq_file *seq; int err; if (!cd || !try_module_get(cd->owner)) return -EACCES; err = seq_open(file, &cache_content_op); if (err) { module_put(cd->owner); return err; } seq = file->private_data; seq->private = cd; return 0; } static int content_release(struct inode *inode, struct file *file, struct cache_detail *cd) { int ret = seq_release(inode, file); module_put(cd->owner); return ret; } static int open_flush(struct inode *inode, struct file *file, struct cache_detail *cd) { if (!cd || !try_module_get(cd->owner)) return -EACCES; return nonseekable_open(inode, file); } static int release_flush(struct inode *inode, struct file *file, struct cache_detail *cd) { module_put(cd->owner); return 0; } static ssize_t read_flush(struct file *file, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { char tbuf[22]; size_t len; len = snprintf(tbuf, sizeof(tbuf), "%llu\n", convert_to_wallclock(cd->flush_time)); return simple_read_from_buffer(buf, count, ppos, tbuf, len); } static ssize_t write_flush(struct file *file, const char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { char tbuf[20]; char *ep; time64_t now; if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; /* Note that while we check that 'buf' holds a valid number, * we always ignore the value and just flush everything. * Making use of the number leads to races. */ now = seconds_since_boot(); /* Always flush everything, so behave like cache_purge() * Do this by advancing flush_time to the current time, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. */ if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; cd->nextcheck = now; cache_flush(); if (cd->flush) cd->flush(); *ppos += count; return count; } static ssize_t cache_read_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return cache_read(filp, buf, count, ppos, cd); } static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return cache_write(filp, buf, count, ppos, cd); } static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait) { struct cache_detail *cd = pde_data(file_inode(filp)); return cache_poll(filp, wait, cd); } static long cache_ioctl_procfs(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct cache_detail *cd = pde_data(inode); return cache_ioctl(inode, filp, cmd, arg, cd); } static int cache_open_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return cache_open(inode, filp, cd); } static int cache_release_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return cache_release(inode, filp, cd); } static const struct proc_ops cache_channel_proc_ops = { .proc_read = cache_read_procfs, .proc_write = cache_write_procfs, .proc_poll = cache_poll_procfs, .proc_ioctl = cache_ioctl_procfs, /* for FIONREAD */ .proc_open = cache_open_procfs, .proc_release = cache_release_procfs, }; static int content_open_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return content_open(inode, filp, cd); } static int content_release_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return content_release(inode, filp, cd); } static const struct proc_ops content_proc_ops = { .proc_open = content_open_procfs, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = content_release_procfs, }; static int open_flush_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return open_flush(inode, filp, cd); } static int release_flush_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return release_flush(inode, filp, cd); } static ssize_t read_flush_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return read_flush(filp, buf, count, ppos, cd); } static ssize_t write_flush_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return write_flush(filp, buf, count, ppos, cd); } static const struct proc_ops cache_flush_proc_ops = { .proc_open = open_flush_procfs, .proc_read = read_flush_procfs, .proc_write = write_flush_procfs, .proc_release = release_flush_procfs, }; static void remove_cache_proc_entries(struct cache_detail *cd) { if (cd->procfs) { proc_remove(cd->procfs); cd->procfs = NULL; } } static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; struct sunrpc_net *sn; if (!IS_ENABLED(CONFIG_PROC_FS)) return 0; sn = net_generic(net, sunrpc_net_id); cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->procfs == NULL) goto out_nomem; p = proc_create_data("flush", S_IFREG | 0600, cd->procfs, &cache_flush_proc_ops, cd); if (p == NULL) goto out_nomem; if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG | 0600, cd->procfs, &cache_channel_proc_ops, cd); if (p == NULL) goto out_nomem; } if (cd->cache_show) { p = proc_create_data("content", S_IFREG | 0400, cd->procfs, &content_proc_ops, cd); if (p == NULL) goto out_nomem; } return 0; out_nomem: remove_cache_proc_entries(cd); return -ENOMEM; } void __init cache_initialize(void) { INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean); } int cache_register_net(struct cache_detail *cd, struct net *net) { int ret; sunrpc_init_cache_detail(cd); ret = create_cache_proc_entries(cd, net); if (ret) sunrpc_destroy_cache_detail(cd); return ret; } EXPORT_SYMBOL_GPL(cache_register_net); void cache_unregister_net(struct cache_detail *cd, struct net *net) { remove_cache_proc_entries(cd); sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(cache_unregister_net); struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; int i; cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); if (cd == NULL) return ERR_PTR(-ENOMEM); cd->hash_table = kcalloc(cd->hash_size, sizeof(struct hlist_head), GFP_KERNEL); if (cd->hash_table == NULL) { kfree(cd); return ERR_PTR(-ENOMEM); } for (i = 0; i < cd->hash_size; i++) INIT_HLIST_HEAD(&cd->hash_table[i]); cd->net = net; return cd; } EXPORT_SYMBOL_GPL(cache_create_net); void cache_destroy_net(struct cache_detail *cd, struct net *net) { kfree(cd->hash_table); kfree(cd); } EXPORT_SYMBOL_GPL(cache_destroy_net); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_read(filp, buf, count, ppos, cd); } static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_write(filp, buf, count, ppos, cd); } static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_poll(filp, wait, cd); } static long cache_ioctl_pipefs(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct cache_detail *cd = RPC_I(inode)->private; return cache_ioctl(inode, filp, cmd, arg, cd); } static int cache_open_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return cache_open(inode, filp, cd); } static int cache_release_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return cache_release(inode, filp, cd); } const struct file_operations cache_file_operations_pipefs = { .owner = THIS_MODULE, .read = cache_read_pipefs, .write = cache_write_pipefs, .poll = cache_poll_pipefs, .unlocked_ioctl = cache_ioctl_pipefs, /* for FIONREAD */ .open = cache_open_pipefs, .release = cache_release_pipefs, }; static int content_open_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return content_open(inode, filp, cd); } static int content_release_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return content_release(inode, filp, cd); } const struct file_operations content_file_operations_pipefs = { .open = content_open_pipefs, .read = seq_read, .llseek = seq_lseek, .release = content_release_pipefs, }; static int open_flush_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return open_flush(inode, filp, cd); } static int release_flush_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return release_flush(inode, filp, cd); } static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return read_flush(filp, buf, count, ppos, cd); } static ssize_t write_flush_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return write_flush(filp, buf, count, ppos, cd); } const struct file_operations cache_flush_operations_pipefs = { .open = open_flush_pipefs, .read = read_flush_pipefs, .write = write_flush_pipefs, .release = release_flush_pipefs, }; int sunrpc_cache_register_pipefs(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); if (IS_ERR(dir)) return PTR_ERR(dir); cd->pipefs = dir; return 0; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) { if (cd->pipefs) { rpc_remove_cache_dir(cd->pipefs); cd->pipefs = NULL; } } EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) { spin_lock(&cd->hash_lock); if (!hlist_unhashed(&h->cache_list)){ sunrpc_begin_cache_remove_entry(h, cd); spin_unlock(&cd->hash_lock); sunrpc_end_cache_remove_entry(h, cd); } else spin_unlock(&cd->hash_lock); } EXPORT_SYMBOL_GPL(sunrpc_cache_unhash); |
| 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 8 8 1 1 1 1 1 1 10 10 4 8 8 1 1 3 10 2 2 1 2 10 10 10 10 10 10 10 2 2 2 10 12 1 2 10 10 10 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 | // SPDX-License-Identifier: GPL-2.0-only /* * Driver for the VoIP USB phones with CM109 chipsets. * * Copyright (C) 2007 - 2008 Alfred E. Heggestad <aeh@db.org> */ /* * Tested devices: * - Komunikate KIP1000 * - Genius G-talk * - Allied-Telesis Corega USBPH01 * - ... * * This driver is based on the yealink.c driver * * Thanks to: * - Authors of yealink.c * - Thomas Reitmayr * - Oliver Neukum for good review comments and code * - Shaun Jackman <sjackman@gmail.com> for Genius G-talk keymap * - Dmitry Torokhov for valuable input and review * * Todo: * - Read/write EEPROM */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/rwsem.h> #include <linux/usb/input.h> #define DRIVER_VERSION "20080805" #define DRIVER_AUTHOR "Alfred E. Heggestad" #define DRIVER_DESC "CM109 phone driver" static char *phone = "kip1000"; module_param(phone, charp, S_IRUSR); MODULE_PARM_DESC(phone, "Phone name {kip1000, gtalk, usbph01, atcom}"); enum { /* HID Registers */ HID_IR0 = 0x00, /* Record/Playback-mute button, Volume up/down */ HID_IR1 = 0x01, /* GPI, generic registers or EEPROM_DATA0 */ HID_IR2 = 0x02, /* Generic registers or EEPROM_DATA1 */ HID_IR3 = 0x03, /* Generic registers or EEPROM_CTRL */ HID_OR0 = 0x00, /* Mapping control, buzzer, SPDIF (offset 0x04) */ HID_OR1 = 0x01, /* GPO - General Purpose Output */ HID_OR2 = 0x02, /* Set GPIO to input/output mode */ HID_OR3 = 0x03, /* SPDIF status channel or EEPROM_CTRL */ /* HID_IR0 */ RECORD_MUTE = 1 << 3, PLAYBACK_MUTE = 1 << 2, VOLUME_DOWN = 1 << 1, VOLUME_UP = 1 << 0, /* HID_OR0 */ /* bits 7-6 0: HID_OR1-2 are used for GPO; HID_OR0, 3 are used for buzzer and SPDIF 1: HID_OR0-3 are used as generic HID registers 2: Values written to HID_OR0-3 are also mapped to MCU_CTRL, EEPROM_DATA0-1, EEPROM_CTRL (see Note) 3: Reserved */ HID_OR_GPO_BUZ_SPDIF = 0 << 6, HID_OR_GENERIC_HID_REG = 1 << 6, HID_OR_MAP_MCU_EEPROM = 2 << 6, BUZZER_ON = 1 << 5, /* up to 256 normal keys, up to 15 special key combinations */ KEYMAP_SIZE = 256 + 15, }; /* CM109 protocol packet */ struct cm109_ctl_packet { u8 byte[4]; } __attribute__ ((packed)); enum { USB_PKT_LEN = sizeof(struct cm109_ctl_packet) }; /* CM109 device structure */ struct cm109_dev { struct input_dev *idev; /* input device */ struct usb_device *udev; /* usb device */ struct usb_interface *intf; /* irq input channel */ struct cm109_ctl_packet *irq_data; dma_addr_t irq_dma; struct urb *urb_irq; /* control output channel */ struct cm109_ctl_packet *ctl_data; dma_addr_t ctl_dma; struct usb_ctrlrequest *ctl_req; struct urb *urb_ctl; /* * The 3 bitfields below are protected by ctl_submit_lock. * They have to be separate since they are accessed from IRQ * context. */ unsigned irq_urb_pending:1; /* irq_urb is in flight */ unsigned ctl_urb_pending:1; /* ctl_urb is in flight */ unsigned buzzer_pending:1; /* need to issue buzz command */ spinlock_t ctl_submit_lock; unsigned char buzzer_state; /* on/off */ /* flags */ unsigned open:1; unsigned resetting:1; unsigned shutdown:1; /* This mutex protects writes to the above flags */ struct mutex pm_mutex; unsigned short keymap[KEYMAP_SIZE]; char phys[64]; /* physical device path */ int key_code; /* last reported key */ int keybit; /* 0=new scan 1,2,4,8=scan columns */ u8 gpi; /* Cached value of GPI (high nibble) */ }; /****************************************************************************** * CM109 key interface *****************************************************************************/ static unsigned short special_keymap(int code) { if (code > 0xff) { switch (code - 0xff) { case RECORD_MUTE: return KEY_MICMUTE; case PLAYBACK_MUTE: return KEY_MUTE; case VOLUME_DOWN: return KEY_VOLUMEDOWN; case VOLUME_UP: return KEY_VOLUMEUP; } } return KEY_RESERVED; } /* Map device buttons to internal key events. * * The "up" and "down" keys, are symbolised by arrows on the button. * The "pickup" and "hangup" keys are symbolised by a green and red phone * on the button. Komunikate KIP1000 Keyboard Matrix -> -- 1 -- 2 -- 3 --> GPI pin 4 (0x10) | | | | <- -- 4 -- 5 -- 6 --> GPI pin 5 (0x20) | | | | END - 7 -- 8 -- 9 --> GPI pin 6 (0x40) | | | | OK -- * -- 0 -- # --> GPI pin 7 (0x80) | | | | /|\ /|\ /|\ /|\ | | | | GPO pin: 3 2 1 0 0x8 0x4 0x2 0x1 */ static unsigned short keymap_kip1000(int scancode) { switch (scancode) { /* phone key: */ case 0x82: return KEY_NUMERIC_0; /* 0 */ case 0x14: return KEY_NUMERIC_1; /* 1 */ case 0x12: return KEY_NUMERIC_2; /* 2 */ case 0x11: return KEY_NUMERIC_3; /* 3 */ case 0x24: return KEY_NUMERIC_4; /* 4 */ case 0x22: return KEY_NUMERIC_5; /* 5 */ case 0x21: return KEY_NUMERIC_6; /* 6 */ case 0x44: return KEY_NUMERIC_7; /* 7 */ case 0x42: return KEY_NUMERIC_8; /* 8 */ case 0x41: return KEY_NUMERIC_9; /* 9 */ case 0x81: return KEY_NUMERIC_POUND; /* # */ case 0x84: return KEY_NUMERIC_STAR; /* * */ case 0x88: return KEY_ENTER; /* pickup */ case 0x48: return KEY_ESC; /* hangup */ case 0x28: return KEY_LEFT; /* IN */ case 0x18: return KEY_RIGHT; /* OUT */ default: return special_keymap(scancode); } } /* Contributed by Shaun Jackman <sjackman@gmail.com> Genius G-Talk keyboard matrix 0 1 2 3 4: 0 4 8 Talk 5: 1 5 9 End 6: 2 6 # Up 7: 3 7 * Down */ static unsigned short keymap_gtalk(int scancode) { switch (scancode) { case 0x11: return KEY_NUMERIC_0; case 0x21: return KEY_NUMERIC_1; case 0x41: return KEY_NUMERIC_2; case 0x81: return KEY_NUMERIC_3; case 0x12: return KEY_NUMERIC_4; case 0x22: return KEY_NUMERIC_5; case 0x42: return KEY_NUMERIC_6; case 0x82: return KEY_NUMERIC_7; case 0x14: return KEY_NUMERIC_8; case 0x24: return KEY_NUMERIC_9; case 0x44: return KEY_NUMERIC_POUND; /* # */ case 0x84: return KEY_NUMERIC_STAR; /* * */ case 0x18: return KEY_ENTER; /* Talk (green handset) */ case 0x28: return KEY_ESC; /* End (red handset) */ case 0x48: return KEY_UP; /* Menu up (rocker switch) */ case 0x88: return KEY_DOWN; /* Menu down (rocker switch) */ default: return special_keymap(scancode); } } /* * Keymap for Allied-Telesis Corega USBPH01 * http://www.alliedtelesis-corega.com/2/1344/1437/1360/chprd.html * * Contributed by july@nat.bg */ static unsigned short keymap_usbph01(int scancode) { switch (scancode) { case 0x11: return KEY_NUMERIC_0; /* 0 */ case 0x21: return KEY_NUMERIC_1; /* 1 */ case 0x41: return KEY_NUMERIC_2; /* 2 */ case 0x81: return KEY_NUMERIC_3; /* 3 */ case 0x12: return KEY_NUMERIC_4; /* 4 */ case 0x22: return KEY_NUMERIC_5; /* 5 */ case 0x42: return KEY_NUMERIC_6; /* 6 */ case 0x82: return KEY_NUMERIC_7; /* 7 */ case 0x14: return KEY_NUMERIC_8; /* 8 */ case 0x24: return KEY_NUMERIC_9; /* 9 */ case 0x44: return KEY_NUMERIC_POUND; /* # */ case 0x84: return KEY_NUMERIC_STAR; /* * */ case 0x18: return KEY_ENTER; /* pickup */ case 0x28: return KEY_ESC; /* hangup */ case 0x48: return KEY_LEFT; /* IN */ case 0x88: return KEY_RIGHT; /* OUT */ default: return special_keymap(scancode); } } /* * Keymap for ATCom AU-100 * http://www.atcom.cn/products.html * http://www.packetizer.com/products/au100/ * http://www.voip-info.org/wiki/view/AU-100 * * Contributed by daniel@gimpelevich.san-francisco.ca.us */ static unsigned short keymap_atcom(int scancode) { switch (scancode) { /* phone key: */ case 0x82: return KEY_NUMERIC_0; /* 0 */ case 0x11: return KEY_NUMERIC_1; /* 1 */ case 0x12: return KEY_NUMERIC_2; /* 2 */ case 0x14: return KEY_NUMERIC_3; /* 3 */ case 0x21: return KEY_NUMERIC_4; /* 4 */ case 0x22: return KEY_NUMERIC_5; /* 5 */ case 0x24: return KEY_NUMERIC_6; /* 6 */ case 0x41: return KEY_NUMERIC_7; /* 7 */ case 0x42: return KEY_NUMERIC_8; /* 8 */ case 0x44: return KEY_NUMERIC_9; /* 9 */ case 0x84: return KEY_NUMERIC_POUND; /* # */ case 0x81: return KEY_NUMERIC_STAR; /* * */ case 0x18: return KEY_ENTER; /* pickup */ case 0x28: return KEY_ESC; /* hangup */ case 0x48: return KEY_LEFT; /* left arrow */ case 0x88: return KEY_RIGHT; /* right arrow */ default: return special_keymap(scancode); } } static unsigned short (*keymap)(int) = keymap_kip1000; /* * Completes a request by converting the data into events for the * input subsystem. */ static void report_key(struct cm109_dev *dev, int key) { struct input_dev *idev = dev->idev; if (dev->key_code >= 0) { /* old key up */ input_report_key(idev, dev->key_code, 0); } dev->key_code = key; if (key >= 0) { /* new valid key */ input_report_key(idev, key, 1); } input_sync(idev); } /* * Converts data of special key presses (volume, mute) into events * for the input subsystem, sends press-n-release for mute keys. */ static void cm109_report_special(struct cm109_dev *dev) { static const u8 autorelease = RECORD_MUTE | PLAYBACK_MUTE; struct input_dev *idev = dev->idev; u8 data = dev->irq_data->byte[HID_IR0]; unsigned short keycode; int i; for (i = 0; i < 4; i++) { keycode = dev->keymap[0xff + BIT(i)]; if (keycode == KEY_RESERVED) continue; input_report_key(idev, keycode, data & BIT(i)); if (data & autorelease & BIT(i)) { input_sync(idev); input_report_key(idev, keycode, 0); } } input_sync(idev); } /****************************************************************************** * CM109 usb communication interface *****************************************************************************/ static void cm109_submit_buzz_toggle(struct cm109_dev *dev) { int error; if (dev->buzzer_state) dev->ctl_data->byte[HID_OR0] |= BUZZER_ON; else dev->ctl_data->byte[HID_OR0] &= ~BUZZER_ON; error = usb_submit_urb(dev->urb_ctl, GFP_ATOMIC); if (error) dev_err(&dev->intf->dev, "%s: usb_submit_urb (urb_ctl) failed %d\n", __func__, error); } static void cm109_submit_ctl(struct cm109_dev *dev) { int error; guard(spinlock_irqsave)(&dev->ctl_submit_lock); dev->irq_urb_pending = 0; if (unlikely(dev->shutdown)) return; if (dev->buzzer_state) dev->ctl_data->byte[HID_OR0] |= BUZZER_ON; else dev->ctl_data->byte[HID_OR0] &= ~BUZZER_ON; dev->ctl_data->byte[HID_OR1] = dev->keybit; dev->ctl_data->byte[HID_OR2] = dev->keybit; dev->buzzer_pending = 0; dev->ctl_urb_pending = 1; error = usb_submit_urb(dev->urb_ctl, GFP_ATOMIC); if (error) dev_err(&dev->intf->dev, "%s: usb_submit_urb (urb_ctl) failed %d\n", __func__, error); } /* * IRQ handler */ static void cm109_urb_irq_callback(struct urb *urb) { struct cm109_dev *dev = urb->context; const int status = urb->status; dev_dbg(&dev->intf->dev, "### URB IRQ: [0x%02x 0x%02x 0x%02x 0x%02x] keybit=0x%02x\n", dev->irq_data->byte[0], dev->irq_data->byte[1], dev->irq_data->byte[2], dev->irq_data->byte[3], dev->keybit); if (status) { if (status == -ESHUTDOWN) return; dev_err_ratelimited(&dev->intf->dev, "%s: urb status %d\n", __func__, status); goto out; } /* Special keys */ cm109_report_special(dev); /* Scan key column */ if (dev->keybit == 0xf) { /* Any changes ? */ if ((dev->gpi & 0xf0) == (dev->irq_data->byte[HID_IR1] & 0xf0)) goto out; dev->gpi = dev->irq_data->byte[HID_IR1] & 0xf0; dev->keybit = 0x1; } else { report_key(dev, dev->keymap[dev->irq_data->byte[HID_IR1]]); dev->keybit <<= 1; if (dev->keybit > 0x8) dev->keybit = 0xf; } out: cm109_submit_ctl(dev); } static void cm109_urb_ctl_callback(struct urb *urb) { struct cm109_dev *dev = urb->context; const int status = urb->status; int error; dev_dbg(&dev->intf->dev, "### URB CTL: [0x%02x 0x%02x 0x%02x 0x%02x]\n", dev->ctl_data->byte[0], dev->ctl_data->byte[1], dev->ctl_data->byte[2], dev->ctl_data->byte[3]); if (status) { if (status == -ESHUTDOWN) return; dev_err_ratelimited(&dev->intf->dev, "%s: urb status %d\n", __func__, status); } guard(spinlock_irqsave)(&dev->ctl_submit_lock); dev->ctl_urb_pending = 0; if (unlikely(dev->shutdown)) return; if (dev->buzzer_pending || status) { dev->buzzer_pending = 0; dev->ctl_urb_pending = 1; cm109_submit_buzz_toggle(dev); } else if (likely(!dev->irq_urb_pending)) { /* ask for key data */ dev->irq_urb_pending = 1; error = usb_submit_urb(dev->urb_irq, GFP_ATOMIC); if (error) dev_err(&dev->intf->dev, "%s: usb_submit_urb (urb_irq) failed %d\n", __func__, error); } } static void cm109_toggle_buzzer_async(struct cm109_dev *dev) { guard(spinlock_irqsave)(&dev->ctl_submit_lock); if (dev->ctl_urb_pending) { /* URB completion will resubmit */ dev->buzzer_pending = 1; } else { dev->ctl_urb_pending = 1; cm109_submit_buzz_toggle(dev); } } static void cm109_toggle_buzzer_sync(struct cm109_dev *dev, int on) { int error; if (on) dev->ctl_data->byte[HID_OR0] |= BUZZER_ON; else dev->ctl_data->byte[HID_OR0] &= ~BUZZER_ON; error = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), dev->ctl_req->bRequest, dev->ctl_req->bRequestType, le16_to_cpu(dev->ctl_req->wValue), le16_to_cpu(dev->ctl_req->wIndex), dev->ctl_data, USB_PKT_LEN, USB_CTRL_SET_TIMEOUT); if (error < 0 && error != -EINTR) dev_err(&dev->intf->dev, "%s: usb_control_msg() failed %d\n", __func__, error); } static void cm109_stop_traffic(struct cm109_dev *dev) { dev->shutdown = 1; /* * Make sure other CPUs see this */ smp_wmb(); usb_kill_urb(dev->urb_ctl); usb_kill_urb(dev->urb_irq); cm109_toggle_buzzer_sync(dev, 0); dev->shutdown = 0; smp_wmb(); } static void cm109_restore_state(struct cm109_dev *dev) { if (dev->open) { /* * Restore buzzer state. * This will also kick regular URB submission */ cm109_toggle_buzzer_async(dev); } } /****************************************************************************** * input event interface *****************************************************************************/ static int cm109_input_open(struct input_dev *idev) { struct cm109_dev *dev = input_get_drvdata(idev); int error; error = usb_autopm_get_interface(dev->intf); if (error < 0) { dev_err(&idev->dev, "%s - cannot autoresume, result %d\n", __func__, error); return error; } scoped_guard(mutex, &dev->pm_mutex) { dev->buzzer_state = 0; dev->key_code = -1; /* no keys pressed */ dev->keybit = 0xf; /* issue INIT */ dev->ctl_data->byte[HID_OR0] = HID_OR_GPO_BUZ_SPDIF; dev->ctl_data->byte[HID_OR1] = dev->keybit; dev->ctl_data->byte[HID_OR2] = dev->keybit; dev->ctl_data->byte[HID_OR3] = 0x00; dev->ctl_urb_pending = 1; error = usb_submit_urb(dev->urb_ctl, GFP_KERNEL); if (!error) { dev->open = 1; return 0; } } dev->ctl_urb_pending = 0; usb_autopm_put_interface(dev->intf); dev_err(&dev->intf->dev, "%s: usb_submit_urb (urb_ctl) failed %d\n", __func__, error); return error; } static void cm109_input_close(struct input_dev *idev) { struct cm109_dev *dev = input_get_drvdata(idev); scoped_guard(mutex, &dev->pm_mutex) { /* * Once we are here event delivery is stopped so we * don't need to worry about someone starting buzzer * again */ cm109_stop_traffic(dev); dev->open = 0; } usb_autopm_put_interface(dev->intf); } static int cm109_input_ev(struct input_dev *idev, unsigned int type, unsigned int code, int value) { struct cm109_dev *dev = input_get_drvdata(idev); dev_dbg(&dev->intf->dev, "input_ev: type=%u code=%u value=%d\n", type, code, value); if (type != EV_SND) return -EINVAL; switch (code) { case SND_TONE: case SND_BELL: dev->buzzer_state = !!value; if (!dev->resetting) cm109_toggle_buzzer_async(dev); return 0; default: return -EINVAL; } } /****************************************************************************** * Linux interface and usb initialisation *****************************************************************************/ struct driver_info { char *name; }; static const struct driver_info info_cm109 = { .name = "CM109 USB driver", }; enum { VENDOR_ID = 0x0d8c, /* C-Media Electronics */ PRODUCT_ID_CM109 = 0x000e, /* CM109 defines range 0x0008 - 0x000f */ }; /* table of devices that work with this driver */ static const struct usb_device_id cm109_usb_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = VENDOR_ID, .idProduct = PRODUCT_ID_CM109, .bInterfaceClass = USB_CLASS_HID, .bInterfaceSubClass = 0, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t) &info_cm109 }, /* you can add more devices here with product ID 0x0008 - 0x000f */ { } }; static void cm109_usb_cleanup(struct cm109_dev *dev) { kfree(dev->ctl_req); usb_free_coherent(dev->udev, USB_PKT_LEN, dev->ctl_data, dev->ctl_dma); usb_free_coherent(dev->udev, USB_PKT_LEN, dev->irq_data, dev->irq_dma); usb_free_urb(dev->urb_irq); /* parameter validation in core/urb */ usb_free_urb(dev->urb_ctl); /* parameter validation in core/urb */ kfree(dev); } static void cm109_usb_disconnect(struct usb_interface *interface) { struct cm109_dev *dev = usb_get_intfdata(interface); usb_set_intfdata(interface, NULL); input_unregister_device(dev->idev); cm109_usb_cleanup(dev); } static int cm109_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct driver_info *nfo = (struct driver_info *)id->driver_info; struct usb_host_interface *interface; struct usb_endpoint_descriptor *endpoint; struct cm109_dev *dev; struct input_dev *input_dev = NULL; int ret, pipe, i; int error = -ENOMEM; interface = intf->cur_altsetting; if (interface->desc.bNumEndpoints < 1) return -ENODEV; endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -ENODEV; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; spin_lock_init(&dev->ctl_submit_lock); mutex_init(&dev->pm_mutex); dev->udev = udev; dev->intf = intf; dev->idev = input_dev = input_allocate_device(); if (!input_dev) goto err_out; /* allocate usb buffers */ dev->irq_data = usb_alloc_coherent(udev, USB_PKT_LEN, GFP_KERNEL, &dev->irq_dma); if (!dev->irq_data) goto err_out; dev->ctl_data = usb_alloc_coherent(udev, USB_PKT_LEN, GFP_KERNEL, &dev->ctl_dma); if (!dev->ctl_data) goto err_out; dev->ctl_req = kmalloc(sizeof(*(dev->ctl_req)), GFP_KERNEL); if (!dev->ctl_req) goto err_out; /* allocate urb structures */ dev->urb_irq = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb_irq) goto err_out; dev->urb_ctl = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb_ctl) goto err_out; /* get a handle to the interrupt data pipe */ pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress); ret = usb_maxpacket(udev, pipe); if (ret != USB_PKT_LEN) dev_err(&intf->dev, "invalid payload size %d, expected %d\n", ret, USB_PKT_LEN); /* initialise irq urb */ usb_fill_int_urb(dev->urb_irq, udev, pipe, dev->irq_data, USB_PKT_LEN, cm109_urb_irq_callback, dev, endpoint->bInterval); dev->urb_irq->transfer_dma = dev->irq_dma; dev->urb_irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; dev->urb_irq->dev = udev; /* initialise ctl urb */ dev->ctl_req->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT; dev->ctl_req->bRequest = USB_REQ_SET_CONFIGURATION; dev->ctl_req->wValue = cpu_to_le16(0x200); dev->ctl_req->wIndex = cpu_to_le16(interface->desc.bInterfaceNumber); dev->ctl_req->wLength = cpu_to_le16(USB_PKT_LEN); usb_fill_control_urb(dev->urb_ctl, udev, usb_sndctrlpipe(udev, 0), (void *)dev->ctl_req, dev->ctl_data, USB_PKT_LEN, cm109_urb_ctl_callback, dev); dev->urb_ctl->transfer_dma = dev->ctl_dma; dev->urb_ctl->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; dev->urb_ctl->dev = udev; /* find out the physical bus location */ usb_make_path(udev, dev->phys, sizeof(dev->phys)); strlcat(dev->phys, "/input0", sizeof(dev->phys)); /* register settings for the input device */ input_dev->name = nfo->name; input_dev->phys = dev->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &intf->dev; input_set_drvdata(input_dev, dev); input_dev->open = cm109_input_open; input_dev->close = cm109_input_close; input_dev->event = cm109_input_ev; input_dev->keycode = dev->keymap; input_dev->keycodesize = sizeof(unsigned char); input_dev->keycodemax = ARRAY_SIZE(dev->keymap); input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_SND); input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE); /* register available key events */ for (i = 0; i < KEYMAP_SIZE; i++) { unsigned short k = keymap(i); dev->keymap[i] = k; __set_bit(k, input_dev->keybit); } __clear_bit(KEY_RESERVED, input_dev->keybit); error = input_register_device(dev->idev); if (error) goto err_out; usb_set_intfdata(intf, dev); return 0; err_out: input_free_device(input_dev); cm109_usb_cleanup(dev); return error; } static int cm109_usb_suspend(struct usb_interface *intf, pm_message_t message) { struct cm109_dev *dev = usb_get_intfdata(intf); dev_info(&intf->dev, "cm109: usb_suspend (event=%d)\n", message.event); guard(mutex)(&dev->pm_mutex); cm109_stop_traffic(dev); return 0; } static int cm109_usb_resume(struct usb_interface *intf) { struct cm109_dev *dev = usb_get_intfdata(intf); dev_info(&intf->dev, "cm109: usb_resume\n"); guard(mutex)(&dev->pm_mutex); cm109_restore_state(dev); return 0; } static int cm109_usb_pre_reset(struct usb_interface *intf) { struct cm109_dev *dev = usb_get_intfdata(intf); mutex_lock(&dev->pm_mutex); /* * Make sure input events don't try to toggle buzzer * while we are resetting */ dev->resetting = 1; smp_wmb(); cm109_stop_traffic(dev); return 0; } static int cm109_usb_post_reset(struct usb_interface *intf) { struct cm109_dev *dev = usb_get_intfdata(intf); dev->resetting = 0; smp_wmb(); cm109_restore_state(dev); mutex_unlock(&dev->pm_mutex); return 0; } static struct usb_driver cm109_driver = { .name = "cm109", .probe = cm109_usb_probe, .disconnect = cm109_usb_disconnect, .suspend = cm109_usb_suspend, .resume = cm109_usb_resume, .reset_resume = cm109_usb_resume, .pre_reset = cm109_usb_pre_reset, .post_reset = cm109_usb_post_reset, .id_table = cm109_usb_table, .supports_autosuspend = 1, }; static int __init cm109_select_keymap(void) { /* Load the phone keymap */ if (!strcasecmp(phone, "kip1000")) { keymap = keymap_kip1000; printk(KERN_INFO KBUILD_MODNAME ": " "Keymap for Komunikate KIP1000 phone loaded\n"); } else if (!strcasecmp(phone, "gtalk")) { keymap = keymap_gtalk; printk(KERN_INFO KBUILD_MODNAME ": " "Keymap for Genius G-talk phone loaded\n"); } else if (!strcasecmp(phone, "usbph01")) { keymap = keymap_usbph01; printk(KERN_INFO KBUILD_MODNAME ": " "Keymap for Allied-Telesis Corega USBPH01 phone loaded\n"); } else if (!strcasecmp(phone, "atcom")) { keymap = keymap_atcom; printk(KERN_INFO KBUILD_MODNAME ": " "Keymap for ATCom AU-100 phone loaded\n"); } else { printk(KERN_ERR KBUILD_MODNAME ": " "Unsupported phone: %s\n", phone); return -EINVAL; } return 0; } static int __init cm109_init(void) { int err; err = cm109_select_keymap(); if (err) return err; err = usb_register(&cm109_driver); if (err) return err; printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_DESC ": " DRIVER_VERSION " (C) " DRIVER_AUTHOR "\n"); return 0; } static void __exit cm109_exit(void) { usb_deregister(&cm109_driver); } module_init(cm109_init); module_exit(cm109_exit); MODULE_DEVICE_TABLE(usb, cm109_usb_table); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); |
| 1 1 1 1 1 1 1 1 1 1 2 1 3 3 3 3 1 11 11 11 11 11 11 8 3 1 6 10 8 10 10 10 10 6 11 11 6 11 11 4 4 7 7 7 6 13 11 2 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/ptp_clock_kernel.h> #include <net/netdev_lock.h> #include "netlink.h" #include "common.h" #include "bitset.h" #include "ts.h" struct tsinfo_req_info { struct ethnl_req_info base; struct hwtstamp_provider_desc hwprov_desc; }; struct tsinfo_reply_data { struct ethnl_reply_data base; struct kernel_ethtool_ts_info ts_info; struct ethtool_ts_stats stats; }; #define TSINFO_REQINFO(__req_base) \ container_of(__req_base, struct tsinfo_req_info, base) #define TSINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct tsinfo_reply_data, base) #define ETHTOOL_TS_STAT_CNT \ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1)) const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1] = { [ETHTOOL_A_TSINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), [ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER] = NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), }; int ts_parse_hwtst_provider(const struct nlattr *nest, struct hwtstamp_provider_desc *hwprov_desc, struct netlink_ext_ack *extack, bool *mod) { struct nlattr *tb[ARRAY_SIZE(ethnl_ts_hwtst_prov_policy)]; int ret; ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_ts_hwtst_prov_policy) - 1, nest, ethnl_ts_hwtst_prov_policy, extack); if (ret < 0) return ret; if (NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX) || NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER)) return -EINVAL; ethnl_update_u32(&hwprov_desc->index, tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX], mod); ethnl_update_u32(&hwprov_desc->qualifier, tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER], mod); return 0; } static int tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, struct netlink_ext_ack *extack) { struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); bool mod = false; req->hwprov_desc.index = -1; if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) return 0; return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], &req->hwprov_desc, extack, &mod); } static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; if (req->hwprov_desc.index != -1) { ret = ethtool_get_ts_info_by_phc(dev, &data->ts_info, &req->hwprov_desc); ethnl_ops_complete(dev); return ret; } if (req_base->flags & ETHTOOL_FLAG_STATS) { ethtool_stats_init((u64 *)&data->stats, sizeof(data->stats) / sizeof(u64)); if (dev->ethtool_ops->get_ts_stats) dev->ethtool_ops->get_ts_stats(dev, &data->stats); } ret = __ethtool_get_ts_info(dev, &data->ts_info); ethnl_ops_complete(dev); return ret; } static int tsinfo_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int len = 0; int ret; BUILD_BUG_ON(__SOF_TIMESTAMPING_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); if (ts_info->so_timestamping) { ret = ethnl_bitset32_size(&ts_info->so_timestamping, NULL, __SOF_TIMESTAMPING_CNT, sof_timestamping_names, compact); if (ret < 0) return ret; len += ret; /* _TSINFO_TIMESTAMPING */ } if (ts_info->tx_types) { ret = ethnl_bitset32_size(&ts_info->tx_types, NULL, __HWTSTAMP_TX_CNT, ts_tx_type_names, compact); if (ret < 0) return ret; len += ret; /* _TSINFO_TX_TYPES */ } if (ts_info->rx_filters) { ret = ethnl_bitset32_size(&ts_info->rx_filters, NULL, __HWTSTAMP_FILTER_CNT, ts_rx_filter_names, compact); if (ret < 0) return ret; len += ret; /* _TSINFO_RX_FILTERS */ } if (ts_info->phc_index >= 0) { len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */ /* _TSINFO_HWTSTAMP_PROVIDER */ len += nla_total_size(0) + 2 * nla_total_size(sizeof(u32)); } if (ts_info->phc_source) { len += nla_total_size(sizeof(u32)); /* _TSINFO_HWTSTAMP_SOURCE */ if (ts_info->phc_phyindex) /* _TSINFO_HWTSTAMP_PHYINDEX */ len += nla_total_size(sizeof(u32)); } if (req_base->flags & ETHTOOL_FLAG_STATS) len += nla_total_size(0) + /* _TSINFO_STATS */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT; return len; } static int tsinfo_put_stat(struct sk_buff *skb, u64 val, u16 attrtype) { if (val == ETHTOOL_STAT_NOT_SET) return 0; if (nla_put_uint(skb, attrtype, val)) return -EMSGSIZE; return 0; } static int tsinfo_put_stats(struct sk_buff *skb, const struct ethtool_ts_stats *stats) { struct nlattr *nest; nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_STATS); if (!nest) return -EMSGSIZE; if (tsinfo_put_stat(skb, stats->tx_stats.pkts, ETHTOOL_A_TS_STAT_TX_PKTS) || tsinfo_put_stat(skb, stats->tx_stats.onestep_pkts_unconfirmed, ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED) || tsinfo_put_stat(skb, stats->tx_stats.lost, ETHTOOL_A_TS_STAT_TX_LOST) || tsinfo_put_stat(skb, stats->tx_stats.err, ETHTOOL_A_TS_STAT_TX_ERR)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int tsinfo_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int ret; if (ts_info->so_timestamping) { ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSINFO_TIMESTAMPING, &ts_info->so_timestamping, NULL, __SOF_TIMESTAMPING_CNT, sof_timestamping_names, compact); if (ret < 0) return ret; } if (ts_info->tx_types) { ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSINFO_TX_TYPES, &ts_info->tx_types, NULL, __HWTSTAMP_TX_CNT, ts_tx_type_names, compact); if (ret < 0) return ret; } if (ts_info->rx_filters) { ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSINFO_RX_FILTERS, &ts_info->rx_filters, NULL, __HWTSTAMP_FILTER_CNT, ts_rx_filter_names, compact); if (ret < 0) return ret; } if (ts_info->phc_index >= 0) { struct nlattr *nest; ret = nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index); if (ret) return -EMSGSIZE; nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, ts_info->phc_index) || nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, ts_info->phc_qualifier)) { nla_nest_cancel(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest); } if (ts_info->phc_source) { if (nla_put_u32(skb, ETHTOOL_A_TSINFO_HWTSTAMP_SOURCE, ts_info->phc_source)) return -EMSGSIZE; if (ts_info->phc_phyindex && nla_put_u32(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PHYINDEX, ts_info->phc_phyindex)) return -EMSGSIZE; } if (req_base->flags & ETHTOOL_FLAG_STATS && tsinfo_put_stats(skb, &data->stats)) return -EMSGSIZE; return 0; } struct ethnl_tsinfo_dump_ctx { struct tsinfo_req_info *req_info; struct tsinfo_reply_data *reply_data; unsigned long pos_ifindex; bool netdev_dump_done; unsigned long pos_phyindex; enum hwtstamp_provider_qualifier pos_phcqualifier; }; static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, struct net_device *dev, struct tsinfo_reply_data *reply_data, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; void *ehdr = NULL; ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_TSINFO_GET_REPLY); if (!ehdr) return ERR_PTR(-EMSGSIZE); reply_data = ctx->reply_data; memset(reply_data, 0, sizeof(*reply_data)); reply_data->base.dev = dev; reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO; reply_data->ts_info.phc_index = -1; return ehdr; } static int ethnl_tsinfo_end_dump(struct sk_buff *skb, struct net_device *dev, struct tsinfo_req_info *req_info, struct tsinfo_reply_data *reply_data, void *ehdr) { int ret; reply_data->ts_info.so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TSINFO_HEADER); if (ret < 0) return ret; ret = tsinfo_fill_reply(skb, &req_info->base, &reply_data->base); if (ret < 0) return ret; reply_data->base.dev = NULL; genlmsg_end(skb, ehdr); return ret; } static int ethnl_tsinfo_dump_one_phydev(struct sk_buff *skb, struct net_device *dev, struct phy_device *phydev, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct tsinfo_reply_data *reply_data; struct tsinfo_req_info *req_info; void *ehdr = NULL; int ret = 0; if (!phy_has_tsinfo(phydev)) return -EOPNOTSUPP; reply_data = ctx->reply_data; req_info = ctx->req_info; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); if (IS_ERR(ehdr)) return PTR_ERR(ehdr); ret = phy_ts_info(phydev, &reply_data->ts_info); if (ret < 0) goto err; if (reply_data->ts_info.phc_index >= 0) { reply_data->ts_info.phc_source = HWTSTAMP_SOURCE_PHYLIB; reply_data->ts_info.phc_phyindex = phydev->phyindex; } ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); if (ret < 0) goto err; return ret; err: genlmsg_cancel(skb, ehdr); return ret; } static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, struct net_device *dev, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; const struct ethtool_ops *ops = dev->ethtool_ops; struct tsinfo_reply_data *reply_data; struct tsinfo_req_info *req_info; void *ehdr = NULL; int ret = 0; if (!ops->get_ts_info) return -EOPNOTSUPP; reply_data = ctx->reply_data; req_info = ctx->req_info; for (; ctx->pos_phcqualifier < HWTSTAMP_PROVIDER_QUALIFIER_CNT; ctx->pos_phcqualifier++) { if (!net_support_hwtstamp_qualifier(dev, ctx->pos_phcqualifier)) continue; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); if (IS_ERR(ehdr)) { ret = PTR_ERR(ehdr); goto err; } reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; ret = ops->get_ts_info(dev, &reply_data->ts_info); if (ret < 0) goto err; if (reply_data->ts_info.phc_index >= 0) reply_data->ts_info.phc_source = HWTSTAMP_SOURCE_NETDEV; ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); if (ret < 0) goto err; } return ret; err: genlmsg_cancel(skb, ehdr); return ret; } static int ethnl_tsinfo_dump_one_net_topo(struct sk_buff *skb, struct net_device *dev, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct phy_device_node *pdn; int ret = 0; if (!ctx->netdev_dump_done) { ret = ethnl_tsinfo_dump_one_netdev(skb, dev, cb); if (ret < 0 && ret != -EOPNOTSUPP) return ret; ctx->netdev_dump_done = true; } if (!dev->link_topo) { if (phy_has_tsinfo(dev->phydev)) { ret = ethnl_tsinfo_dump_one_phydev(skb, dev, dev->phydev, cb); if (ret < 0 && ret != -EOPNOTSUPP) return ret; } return 0; } xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn, ctx->pos_phyindex) { if (phy_has_tsinfo(pdn->phy)) { ret = ethnl_tsinfo_dump_one_phydev(skb, dev, pdn->phy, cb); if (ret < 0 && ret != -EOPNOTSUPP) return ret; } } return ret; } int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); struct net_device *dev; int ret = 0; rtnl_lock(); if (ctx->req_info->base.dev) { dev = ctx->req_info->base.dev; netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->pos_ifindex) { netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); netdev_unlock_ops(dev); if (ret < 0 && ret != -EOPNOTSUPP) break; ctx->pos_phyindex = 0; ctx->netdev_dump_done = false; ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; } } rtnl_unlock(); return ret; } int ethnl_tsinfo_start(struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct nlattr **tb = info->info.attrs; struct tsinfo_reply_data *reply_data; struct tsinfo_req_info *req_info; int ret; BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); if (!req_info) return -ENOMEM; reply_data = kzalloc(sizeof(*reply_data), GFP_KERNEL); if (!reply_data) { ret = -ENOMEM; goto free_req_info; } ret = ethnl_parse_header_dev_get(&req_info->base, tb[ETHTOOL_A_TSINFO_HEADER], sock_net(cb->skb->sk), cb->extack, false); if (ret < 0) goto free_reply_data; ctx->req_info = req_info; ctx->reply_data = reply_data; ctx->pos_ifindex = 0; ctx->pos_phyindex = 0; ctx->netdev_dump_done = false; ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; return 0; free_reply_data: kfree(reply_data); free_req_info: kfree(req_info); return ret; } int ethnl_tsinfo_done(struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct tsinfo_req_info *req_info = ctx->req_info; ethnl_parse_header_dev_put(&req_info->base); kfree(ctx->reply_data); kfree(ctx->req_info); return 0; } const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .request_cmd = ETHTOOL_MSG_TSINFO_GET, .reply_cmd = ETHTOOL_MSG_TSINFO_GET_REPLY, .hdr_attr = ETHTOOL_A_TSINFO_HEADER, .req_info_size = sizeof(struct tsinfo_req_info), .reply_data_size = sizeof(struct tsinfo_reply_data), .parse_request = tsinfo_parse_request, .prepare_data = tsinfo_prepare_data, .reply_size = tsinfo_reply_size, .fill_reply = tsinfo_fill_reply, }; |
| 30 10 10 13 30 5 31 10 22 33 1 33 32 33 1 12 22 33 43 22 43 11 2 12 33 33 33 10 10 11 4 7 7 2 2 7 14 1 9 4 11 2 3 10 7 3 7 3 7 3 8 2 8 2 10 10 15 1 11 11 9 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 | // SPDX-License-Identifier: GPL-2.0-only /* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF) * * Copyright (C) 2013 Terry Lam <vtlam@google.com> * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com> */ #include <linux/jiffies.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/siphash.h> #include <net/pkt_sched.h> #include <net/sock.h> /* Heavy-Hitter Filter (HHF) * * Principles : * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified * as heavy-hitter, it is immediately switched to the heavy-hitter bucket. * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler, * in which the heavy-hitter bucket is served with less weight. * In other words, non-heavy-hitters (e.g., short bursts of critical traffic) * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have * higher share of bandwidth. * * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the * following paper: * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and * Accounting", in ACM SIGCOMM, 2002. * * Conceptually, a multi-stage filter comprises k independent hash functions * and k counter arrays. Packets are indexed into k counter arrays by k hash * functions, respectively. The counters are then increased by the packet sizes. * Therefore, * - For a heavy-hitter flow: *all* of its k array counters must be large. * - For a non-heavy-hitter flow: some of its k array counters can be large * due to hash collision with other small flows; however, with high * probability, not *all* k counters are large. * * By the design of the multi-stage filter algorithm, the false negative rate * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is * susceptible to false positives (non-heavy-hitters mistakenly classified as * heavy-hitters). * Therefore, we also implement the following optimizations to reduce false * positives by avoiding unnecessary increment of the counter values: * - Optimization O1: once a heavy-hitter is identified, its bytes are not * accounted in the array counters. This technique is called "shielding" * in Section 3.3.1 of [EV02]. * - Optimization O2: conservative update of counters * (Section 3.3.2 of [EV02]), * New counter value = max {old counter value, * smallest counter value + packet bytes} * * Finally, we refresh the counters periodically since otherwise the counter * values will keep accumulating. * * Once a flow is classified as heavy-hitter, we also save its per-flow state * in an exact-matching flow table so that its subsequent packets can be * dispatched to the heavy-hitter bucket accordingly. * * * At a high level, this qdisc works as follows: * Given a packet p: * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter * bucket. * - Otherwise, forward p to the multi-stage filter, denoted filter F * + If F decides that p belongs to a non-heavy-hitter flow, then send p * to the non-heavy-hitter bucket. * + Otherwise, if F decides that p belongs to a new heavy-hitter flow, * then set up a new flow entry for the flow-id of p in the table T and * send p to the heavy-hitter bucket. * * In this implementation: * - T is a fixed-size hash-table with 1024 entries. Hash collision is * resolved by linked-list chaining. * - F has four counter arrays, each array containing 1024 32-bit counters. * That means 4 * 1024 * 32 bits = 16KB of memory. * - Since each array in F contains 1024 counters, 10 bits are sufficient to * index into each array. * Hence, instead of having four hash functions, we chop the 32-bit * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is * computed as XOR sum of those three chunks. * - We need to clear the counter arrays periodically; however, directly * memsetting 16KB of memory can lead to cache eviction and unwanted delay. * So by representing each counter by a valid bit, we only need to reset * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory. * - The Deficit Round Robin engine is taken from fq_codel implementation * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to * fq_codel_flow in fq_codel implementation. * */ /* Non-configurable parameters */ #define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */ #define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */ #define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */ #define HHF_BIT_MASK_LEN 10 /* masking 10 bits */ #define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */ #define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */ enum wdrr_bucket_idx { WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */ WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */ }; #define hhf_time_before(a, b) \ (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0)) /* Heavy-hitter per-flow state */ struct hh_flow_state { u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */ u32 hit_timestamp; /* last time heavy-hitter was seen */ struct list_head flowchain; /* chaining under hash collision */ }; /* Weighted Deficit Round Robin (WDRR) scheduler */ struct wdrr_bucket { struct sk_buff *head; struct sk_buff *tail; struct list_head bucketchain; int deficit; }; struct hhf_sched_data { struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_overlimit; /* number of times max qdisc packet * limit was hit */ struct list_head *hh_flows; /* table T (currently active HHs) */ u32 hh_flows_limit; /* max active HH allocs */ u32 hh_flows_overlimit; /* num of disallowed HH allocs */ u32 hh_flows_total_cnt; /* total admitted HHs */ u32 hh_flows_current_cnt; /* total current HHs */ u32 *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */ u32 hhf_arrays_reset_timestamp; /* last time hhf_arrays * was reset */ unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits * of hhf_arrays */ /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */ struct list_head new_buckets; /* list of new buckets */ struct list_head old_buckets; /* list of old buckets */ /* Configurable HHF parameters */ u32 hhf_reset_timeout; /* interval to reset counter * arrays in filter F * (default 40ms) */ u32 hhf_admit_bytes; /* counter thresh to classify as * HH (default 128KB). * With these default values, * 128KB / 40ms = 25 Mbps * i.e., we expect to capture HHs * sending > 25 Mbps. */ u32 hhf_evict_timeout; /* aging threshold to evict idle * HHs out of table T. This should * be large enough to avoid * reordering during HH eviction. * (default 1s) */ u32 hhf_non_hh_weight; /* WDRR weight for non-HHs * (default 2, * i.e., non-HH : HH = 2 : 1) */ }; static u32 hhf_time_stamp(void) { return jiffies; } /* Looks up a heavy-hitter flow in a chaining list of table T. */ static struct hh_flow_state *seek_list(const u32 hash, struct list_head *head, struct hhf_sched_data *q) { struct hh_flow_state *flow, *next; u32 now = hhf_time_stamp(); if (list_empty(head)) return NULL; list_for_each_entry_safe(flow, next, head, flowchain) { u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; if (hhf_time_before(prev, now)) { /* Delete expired heavy-hitters, but preserve one entry * to avoid kzalloc() when next time this slot is hit. */ if (list_is_last(&flow->flowchain, head)) return NULL; list_del(&flow->flowchain); kfree(flow); q->hh_flows_current_cnt--; } else if (flow->hash_id == hash) { return flow; } } return NULL; } /* Returns a flow state entry for a new heavy-hitter. Either reuses an expired * entry or dynamically alloc a new entry. */ static struct hh_flow_state *alloc_new_hh(struct list_head *head, struct hhf_sched_data *q) { struct hh_flow_state *flow; u32 now = hhf_time_stamp(); if (!list_empty(head)) { /* Find an expired heavy-hitter flow entry. */ list_for_each_entry(flow, head, flowchain) { u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; if (hhf_time_before(prev, now)) return flow; } } if (q->hh_flows_current_cnt >= q->hh_flows_limit) { q->hh_flows_overlimit++; return NULL; } /* Create new entry. */ flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC); if (!flow) return NULL; q->hh_flows_current_cnt++; INIT_LIST_HEAD(&flow->flowchain); list_add_tail(&flow->flowchain, head); return flow; } /* Assigns packets to WDRR buckets. Implements a multi-stage filter to * classify heavy-hitters. */ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) { struct hhf_sched_data *q = qdisc_priv(sch); u32 tmp_hash, hash; u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos; struct hh_flow_state *flow; u32 pkt_len, min_hhf_val; int i; u32 prev; u32 now = hhf_time_stamp(); /* Reset the HHF counter arrays if this is the right time. */ prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout; if (hhf_time_before(prev, now)) { for (i = 0; i < HHF_ARRAYS_CNT; i++) bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN); q->hhf_arrays_reset_timestamp = now; } /* Get hashed flow-id of the skb. */ hash = skb_get_hash_perturb(skb, &q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; flow = seek_list(hash, &q->hh_flows[flow_pos], q); if (flow) { /* found its HH flow */ flow->hit_timestamp = now; return WDRR_BUCKET_FOR_HH; } /* Now pass the packet through the multi-stage filter. */ tmp_hash = hash; xorsum = 0; for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) { /* Split the skb_hash into three 10-bit chunks. */ filter_pos[i] = tmp_hash & HHF_BIT_MASK; xorsum ^= filter_pos[i]; tmp_hash >>= HHF_BIT_MASK_LEN; } /* The last chunk is computed as XOR sum of other chunks. */ filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash; pkt_len = qdisc_pkt_len(skb); min_hhf_val = ~0U; for (i = 0; i < HHF_ARRAYS_CNT; i++) { u32 val; if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) { q->hhf_arrays[i][filter_pos[i]] = 0; __set_bit(filter_pos[i], q->hhf_valid_bits[i]); } val = q->hhf_arrays[i][filter_pos[i]] + pkt_len; if (min_hhf_val > val) min_hhf_val = val; } /* Found a new HH iff all counter values > HH admit threshold. */ if (min_hhf_val > q->hhf_admit_bytes) { /* Just captured a new heavy-hitter. */ flow = alloc_new_hh(&q->hh_flows[flow_pos], q); if (!flow) /* memory alloc problem */ return WDRR_BUCKET_FOR_NON_HH; flow->hash_id = hash; flow->hit_timestamp = now; q->hh_flows_total_cnt++; /* By returning without updating counters in q->hhf_arrays, * we implicitly implement "shielding" (see Optimization O1). */ return WDRR_BUCKET_FOR_HH; } /* Conservative update of HHF arrays (see Optimization O2). */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val) q->hhf_arrays[i][filter_pos[i]] = min_hhf_val; } return WDRR_BUCKET_FOR_NON_HH; } /* Removes one skb from head of bucket. */ static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket) { struct sk_buff *skb = bucket->head; bucket->head = skb->next; skb_mark_not_on_list(skb); return skb; } /* Tail-adds skb to bucket. */ static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb) { if (bucket->head == NULL) bucket->head = skb; else bucket->tail->next = skb; bucket->tail = skb; skb->next = NULL; } static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); struct wdrr_bucket *bucket; /* Always try to drop from heavy-hitters first. */ bucket = &q->buckets[WDRR_BUCKET_FOR_HH]; if (!bucket->head) bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH]; if (bucket->head) { struct sk_buff *skb = dequeue_head(bucket); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch, to_free); } /* Return id of the bucket from which the packet was dropped. */ return bucket - q->buckets; } static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); enum wdrr_bucket_idx idx; struct wdrr_bucket *bucket; unsigned int prev_backlog; idx = hhf_classify(skb, sch); bucket = &q->buckets[idx]; bucket_add(bucket, skb); qdisc_qstats_backlog_inc(sch, skb); if (list_empty(&bucket->bucketchain)) { unsigned int weight; /* The logic of new_buckets vs. old_buckets is the same as * new_flows vs. old_flows in the implementation of fq_codel, * i.e., short bursts of non-HHs should have strict priority. */ if (idx == WDRR_BUCKET_FOR_HH) { /* Always move heavy-hitters to old bucket. */ weight = 1; list_add_tail(&bucket->bucketchain, &q->old_buckets); } else { weight = q->hhf_non_hh_weight; list_add_tail(&bucket->bucketchain, &q->new_buckets); } bucket->deficit = weight * q->quantum; } if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet from this * bucket. */ if (hhf_drop(sch, to_free) == idx) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this. */ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } static struct sk_buff *hhf_dequeue(struct Qdisc *sch) { struct hhf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb = NULL; struct wdrr_bucket *bucket; struct list_head *head; begin: head = &q->new_buckets; if (list_empty(head)) { head = &q->old_buckets; if (list_empty(head)) return NULL; } bucket = list_first_entry(head, struct wdrr_bucket, bucketchain); if (bucket->deficit <= 0) { int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ? 1 : q->hhf_non_hh_weight; bucket->deficit += weight * q->quantum; list_move_tail(&bucket->bucketchain, &q->old_buckets); goto begin; } if (bucket->head) { skb = dequeue_head(bucket); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); } if (!skb) { /* Force a pass through old_buckets to prevent starvation. */ if ((head == &q->new_buckets) && !list_empty(&q->old_buckets)) list_move_tail(&bucket->bucketchain, &q->old_buckets); else list_del_init(&bucket->bucketchain); goto begin; } qdisc_bstats_update(sch, skb); bucket->deficit -= qdisc_pkt_len(skb); return skb; } static void hhf_reset(struct Qdisc *sch) { struct sk_buff *skb; while ((skb = hhf_dequeue(sch)) != NULL) rtnl_kfree_skbs(skb, skb); } static void hhf_destroy(struct Qdisc *sch) { int i; struct hhf_sched_data *q = qdisc_priv(sch); for (i = 0; i < HHF_ARRAYS_CNT; i++) { kvfree(q->hhf_arrays[i]); kvfree(q->hhf_valid_bits[i]); } if (!q->hh_flows) return; for (i = 0; i < HH_FLOWS_CNT; i++) { struct hh_flow_state *flow, *next; struct list_head *head = &q->hh_flows[i]; if (list_empty(head)) continue; list_for_each_entry_safe(flow, next, head, flowchain) { list_del(&flow->flowchain); kfree(flow); } } kvfree(q->hh_flows); } static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 }, [TCA_HHF_QUANTUM] = { .type = NLA_U32 }, [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 }, [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 }, [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 }, [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 }, [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 }, }; static int hhf_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { unsigned int dropped_pkts = 0, dropped_bytes = 0; struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; int err; u64 non_hh_quantum; u32 new_quantum = q->quantum; u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight; err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); if (err < 0) return err; if (tb[TCA_HHF_QUANTUM]) new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); if (tb[TCA_HHF_NON_HH_WEIGHT]) new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX) return -EINVAL; sch_tree_lock(sch); if (tb[TCA_HHF_BACKLOG_LIMIT]) WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT])); WRITE_ONCE(q->quantum, new_quantum); WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight); if (tb[TCA_HHF_HH_FLOWS_LIMIT]) WRITE_ONCE(q->hh_flows_limit, nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT])); if (tb[TCA_HHF_RESET_TIMEOUT]) { u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); WRITE_ONCE(q->hhf_reset_timeout, usecs_to_jiffies(us)); } if (tb[TCA_HHF_ADMIT_BYTES]) WRITE_ONCE(q->hhf_admit_bytes, nla_get_u32(tb[TCA_HHF_ADMIT_BYTES])); if (tb[TCA_HHF_EVICT_TIMEOUT]) { u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); WRITE_ONCE(q->hhf_evict_timeout, usecs_to_jiffies(us)); } while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); if (!skb) break; dropped_pkts++; dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); } qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; } static int hhf_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct hhf_sched_data *q = qdisc_priv(sch); int i; sch->limit = 1000; q->quantum = psched_mtu(qdisc_dev(sch)); get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_buckets); INIT_LIST_HEAD(&q->old_buckets); /* Configurable HHF parameters */ q->hhf_reset_timeout = HZ / 25; /* 40 ms */ q->hhf_admit_bytes = 131072; /* 128 KB */ q->hhf_evict_timeout = HZ; /* 1 sec */ q->hhf_non_hh_weight = 2; if (opt) { int err = hhf_change(sch, opt, extack); if (err) return err; } if (!q->hh_flows) { /* Initialize heavy-hitter flow table. */ q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head), GFP_KERNEL); if (!q->hh_flows) return -ENOMEM; for (i = 0; i < HH_FLOWS_CNT; i++) INIT_LIST_HEAD(&q->hh_flows[i]); /* Cap max active HHs at twice len of hh_flows table. */ q->hh_flows_limit = 2 * HH_FLOWS_CNT; q->hh_flows_overlimit = 0; q->hh_flows_total_cnt = 0; q->hh_flows_current_cnt = 0; /* Initialize heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN, sizeof(u32), GFP_KERNEL); if (!q->hhf_arrays[i]) { /* Note: hhf_destroy() will be called * by our caller. */ return -ENOMEM; } } q->hhf_arrays_reset_timestamp = hhf_time_stamp(); /* Initialize valid bits of heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN / BITS_PER_BYTE, GFP_KERNEL); if (!q->hhf_valid_bits[i]) { /* Note: hhf_destroy() will be called * by our caller. */ return -ENOMEM; } } /* Initialize Weighted DRR buckets. */ for (i = 0; i < WDRR_BUCKET_CNT; i++) { struct wdrr_bucket *bucket = q->buckets + i; INIT_LIST_HEAD(&bucket->bucketchain); } } return 0; } static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *opts; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) || nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, READ_ONCE(q->hh_flows_limit)) || nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT, jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) || nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, READ_ONCE(q->hhf_admit_bytes)) || nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT, jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) || nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, READ_ONCE(q->hhf_non_hh_weight))) goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: return -1; } static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct hhf_sched_data *q = qdisc_priv(sch); struct tc_hhf_xstats st = { .drop_overlimit = q->drop_overlimit, .hh_overlimit = q->hh_flows_overlimit, .hh_tot_count = q->hh_flows_total_cnt, .hh_cur_count = q->hh_flows_current_cnt, }; return gnet_stats_copy_app(d, &st, sizeof(st)); } static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .id = "hhf", .priv_size = sizeof(struct hhf_sched_data), .enqueue = hhf_enqueue, .dequeue = hhf_dequeue, .peek = qdisc_peek_dequeued, .init = hhf_init, .reset = hhf_reset, .destroy = hhf_destroy, .change = hhf_change, .dump = hhf_dump, .dump_stats = hhf_dump_stats, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("hhf"); static int __init hhf_module_init(void) { return register_qdisc(&hhf_qdisc_ops); } static void __exit hhf_module_exit(void) { unregister_qdisc(&hhf_qdisc_ops); } module_init(hhf_module_init) module_exit(hhf_module_exit) MODULE_AUTHOR("Terry Lam"); MODULE_AUTHOR("Nandita Dukkipati"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)"); |
| 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 | /* SPDX-License-Identifier: LGPL-2.1 */ /* * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) * Jeremy Allison (jra@samba.org) * */ #ifndef _CIFS_GLOB_H #define _CIFS_GLOB_H #include <linux/in.h> #include <linux/in6.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/scatterlist.h> #include <linux/mm.h> #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/sched/mm.h> #include <linux/netfs.h> #include "cifs_fs_sb.h" #include "cifsacl.h" #include <crypto/internal/hash.h> #include <uapi/linux/cifs/cifs_mount.h> #include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" #include <linux/filelock.h> #define SMB_PATH_MAX 260 #define CIFS_PORT 445 #define RFC1001_PORT 139 /* * The sizes of various internal tables and strings */ #define MAX_UID_INFO 16 #define MAX_SES_INFO 2 #define MAX_TCON_INFO 4 #define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1) #define CIFS_MIN_RCV_POOL 4 #define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */ /* * default attribute cache timeout (jiffies) */ #define CIFS_DEF_ACTIMEO (1 * HZ) /* * max sleep time before retry to server */ #define CIFS_MAX_SLEEP 2000 /* * max attribute cache timeout (jiffies) - 2^30 */ #define CIFS_MAX_ACTIMEO (1 << 30) /* * Max persistent and resilient handle timeout (milliseconds). * Windows durable max was 960000 (16 minutes) */ #define SMB3_MAX_HANDLE_TIMEOUT 960000 /* * MAX_REQ is the maximum number of requests that WE will send * on one socket concurrently. */ #define CIFS_MAX_REQ 32767 #define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) /* maximum length of ip addr as a string (including ipv6 and sctp) */ #define SERVER_NAME_LENGTH 80 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) /* echo interval in seconds */ #define SMB_ECHO_INTERVAL_MIN 1 #define SMB_ECHO_INTERVAL_MAX 600 #define SMB_ECHO_INTERVAL_DEFAULT 60 /* smb multichannel query server interfaces interval in seconds */ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ #define MAX_COMPOUND 10 /* * Default number of credits to keep available for SMB3. * This value is chosen somewhat arbitrarily. The Windows client * defaults to 128 credits, the Windows server allows clients up to * 512 credits (or 8K for later versions), and the NetApp server * does not limit clients at all. Choose a high enough default value * such that the client shouldn't limit performance, but allow mount * to override (until you approach 64K, where we limit credits to 65000 * to reduce possibility of seeing more server credit overflow bugs. */ #define SMB2_MAX_CREDITS_AVAILABLE 32000 #include "cifspdu.h" #ifndef XATTR_DOS_ATTRIB #define XATTR_DOS_ATTRIB "user.DOSATTRIB" #endif #define CIFS_MAX_WORKSTATION_LEN (__NEW_UTS_LEN + 1) /* reasonable max for client */ #define CIFS_DFS_ROOT_SES(ses) ((ses)->dfs_root_ses ?: (ses)) /* * CIFS vfs client Status information (based on what we know.) */ /* associated with each connection */ enum statusEnum { CifsNew = 0, CifsGood, CifsExiting, CifsNeedReconnect, CifsNeedNegotiate, CifsInNegotiate, }; /* associated with each smb session */ enum ses_status_enum { SES_NEW = 0, SES_GOOD, SES_EXITING, SES_NEED_RECON, SES_IN_SETUP }; /* associated with each tree connection to the server */ enum tid_status_enum { TID_NEW = 0, TID_GOOD, TID_EXITING, TID_NEED_RECON, TID_NEED_TCON, TID_IN_TCON, TID_NEED_FILES_INVALIDATE, /* currently unused */ TID_IN_FILES_INVALIDATE }; enum securityEnum { Unspecified = 0, /* not specified */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ Kerberos, /* Kerberos via SPNEGO */ IAKerb, /* Kerberos proxy */ }; enum upcall_target_enum { UPTARGET_UNSPECIFIED, /* not specified, defaults to app */ UPTARGET_MOUNT, /* upcall to the mount namespace */ UPTARGET_APP, /* upcall to the application namespace which did the mount */ }; enum cifs_reparse_type { CIFS_REPARSE_TYPE_NONE, CIFS_REPARSE_TYPE_NFS, CIFS_REPARSE_TYPE_WSL, CIFS_REPARSE_TYPE_DEFAULT = CIFS_REPARSE_TYPE_NFS, }; static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type) { switch (type) { case CIFS_REPARSE_TYPE_NONE: return "none"; case CIFS_REPARSE_TYPE_NFS: return "nfs"; case CIFS_REPARSE_TYPE_WSL: return "wsl"; default: return "unknown"; } } enum cifs_symlink_type { CIFS_SYMLINK_TYPE_DEFAULT, CIFS_SYMLINK_TYPE_NONE, CIFS_SYMLINK_TYPE_NATIVE, CIFS_SYMLINK_TYPE_UNIX, CIFS_SYMLINK_TYPE_MFSYMLINKS, CIFS_SYMLINK_TYPE_SFU, CIFS_SYMLINK_TYPE_NFS, CIFS_SYMLINK_TYPE_WSL, }; static inline const char *cifs_symlink_type_str(enum cifs_symlink_type type) { switch (type) { case CIFS_SYMLINK_TYPE_NONE: return "none"; case CIFS_SYMLINK_TYPE_NATIVE: return "native"; case CIFS_SYMLINK_TYPE_UNIX: return "unix"; case CIFS_SYMLINK_TYPE_MFSYMLINKS: return "mfsymlinks"; case CIFS_SYMLINK_TYPE_SFU: return "sfu"; case CIFS_SYMLINK_TYPE_NFS: return "nfs"; case CIFS_SYMLINK_TYPE_WSL: return "wsl"; default: return "unknown"; } } struct session_key { unsigned int len; char *response; }; /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ struct crypto_aead *dec; /* smb3 decryption AEAD TFM (AES-CCM and AES-GCM) */ }; /* per smb session structure/fields */ struct ntlmssp_auth { bool sesskey_per_smbsess; /* whether session key is per smb session */ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */ }; struct cifs_cred { int uid; int gid; int mode; int cecount; struct smb_sid osid; struct smb_sid gsid; struct cifs_ntace *ntaces; struct smb_ace *aces; }; struct cifs_open_info_data { bool adjust_tz; bool reparse_point; bool contains_posix_file_info; struct { /* ioctl response buffer */ struct { int buftype; struct kvec iov; } io; __u32 tag; struct reparse_data_buffer *buf; } reparse; struct { __u8 eas[SMB2_WSL_MAX_QUERY_EA_RESP_SIZE]; unsigned int eas_len; } wsl; char *symlink_target; struct smb_sid posix_owner; struct smb_sid posix_group; union { struct smb2_file_all_info fi; struct smb311_posix_qinfo posix_fi; }; }; /* ***************************************************************** * Except the CIFS PDUs themselves all the * globally interesting structs should go here ***************************************************************** */ /* * A smb_rqst represents a complete request to be issued to a server. It's * formed by a kvec array, followed by an array of pages. Page data is assumed * to start at the beginning of the first page. */ struct smb_rqst { struct kvec *rq_iov; /* array of kvecs */ unsigned int rq_nvec; /* number of kvecs in array */ struct iov_iter rq_iter; /* Data iterator */ struct folio_queue *rq_buffer; /* Buffer for encryption */ }; struct mid_q_entry; struct TCP_Server_Info; struct cifsFileInfo; struct cifs_ses; struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; struct smb3_fs_context; struct cifs_fid; struct cifs_io_subrequest; struct cifs_io_parms; struct cifs_search_info; struct cifsInodeInfo; struct cifs_open_parms; struct cifs_credits; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, struct smb_rqst *, struct mid_q_entry *); bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); /* setup request: allocate mid, sign message */ struct mid_q_entry *(*setup_request)(struct cifs_ses *, struct TCP_Server_Info *, struct smb_rqst *); /* setup async request: allocate mid, sign message */ struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *, struct smb_rqst *); /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); void (*add_credits)(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *, const int); unsigned int (*get_credits)(struct mid_q_entry *); __u64 (*get_next_mid)(struct TCP_Server_Info *); void (*revert_current_mid)(struct TCP_Server_Info *server, const unsigned int val); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); /* * Data length from read response message * When in_remaining is true, the returned data length is in * message field DataRemaining for out-of-band data read (e.g through * Memory Registration RDMA write in SMBD). * Otherwise, the returned data length is in message field DataLength. */ unsigned int (*read_data_length)(char *, bool in_remaining); /* map smb to linux error */ int (*map_error)(char *, bool); /* find mid corresponding to the response message */ struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *); void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info); void (*clear_stats)(struct cifs_tcon *); void (*print_stats)(struct seq_file *m, struct cifs_tcon *); void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, bool *purge_cache); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); /* check if we need to negotiate */ bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ int (*negotiate)(const unsigned int xid, struct cifs_ses *ses, struct TCP_Server_Info *server); /* set negotiated write size */ unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* set negotiated read size */ unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, struct TCP_Server_Info *server, const struct nls_table *); /* close smb session */ int (*logoff)(const unsigned int, struct cifs_ses *); /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *); /* query for server interfaces */ int (*query_server_interfaces)(const unsigned int, struct cifs_tcon *, bool); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); /* query path data from the server */ int (*query_path_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, struct cifs_open_info_data *data); /* query file data from the server */ int (*query_file_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, struct cifs_open_info_data *data); /* query reparse point to determine which type of special file */ int (*query_reparse_point)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, u32 *tag, struct kvec *rsp, int *rsp_buftype); /* get server index number */ int (*get_srv_inum)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, u64 *uniqueid, struct cifs_open_info_data *data); /* set size by path */ int (*set_path_size)(const unsigned int, struct cifs_tcon *, const char *, __u64, struct cifs_sb_info *, bool, struct dentry *); /* set size by file handle */ int (*set_file_size)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *, __u64, bool); /* set attributes */ int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, const unsigned int); int (*set_compression)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *); /* check if we can send an echo or nor */ bool (*can_echo)(struct TCP_Server_Info *); /* send echo request */ int (*echo)(struct TCP_Server_Info *); /* create directory */ int (*posix_mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); int (*mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *sb); /* set info on created directory */ void (*mkdir_setinfo)(struct inode *, const char *, struct cifs_sb_info *, struct cifs_tcon *, const unsigned int); /* remove directory */ int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *); /* unlink file */ int (*unlink)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct dentry *); /* open, rename and delete file */ int (*rename_pending_delete)(const char *, struct dentry *, const unsigned int); /* send rename request */ int (*rename)(const unsigned int xid, struct cifs_tcon *tcon, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* send create hardlink request */ int (*create_hardlink)(const unsigned int xid, struct cifs_tcon *tcon, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* query symlink target */ int (*query_symlink)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, char **target_path); /* open a file for non-posix mounts */ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf); /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* async read from the server */ int (*async_readv)(struct cifs_io_subrequest *); /* async write to the server */ void (*async_writev)(struct cifs_io_subrequest *); /* sync read from the server */ int (*sync_read)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, char **, int *); /* sync write to the server */ int (*sync_write)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, struct kvec *, unsigned long); /* open dir, start readdir */ int (*query_dir_first)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct cifs_fid *, __u16, struct cifs_search_info *); /* continue readdir */ int (*query_dir_next)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, __u16, struct cifs_search_info *srch_inf); /* close dir */ int (*close_dir)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* calculate a size of SMB message */ unsigned int (*calc_smb_size)(void *buf); /* check for STATUS_PENDING and process the response if yes */ bool (*is_status_pending)(char *buf, struct TCP_Server_Info *server); /* check for STATUS_NETWORK_SESSION_EXPIRED */ bool (*is_session_expired)(char *); /* send oplock break response */ int (*oplock_response)(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cifs_inode); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); /* unlock range of mandatory locks */ int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *, const unsigned int); /* push brlocks from the cache to the server */ int (*push_mand_locks)(struct cifsFileInfo *); /* get lease key of the inode */ void (*get_lease_key)(struct inode *, struct cifs_fid *); /* set lease key of the inode */ void (*set_lease_key)(struct inode *, struct cifs_fid *); /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *ses, struct TCP_Server_Info *server); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *src_file, void __user *); int (*notify)(const unsigned int xid, struct file *pfile, void __user *pbuf, bool return_changes); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); int (*create_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); /* if we can do cache read operations */ bool (*is_read_op)(__u32); /* set oplock level for the inode */ void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, bool *purge_cache); /* create lease context buffer for CREATE request */ char * (*create_lease_buf)(u8 *lease_key, u8 oplock, u8 *parent_lease_key, __le32 le_flags); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, const unsigned char *, const unsigned char *, char *, size_t, struct cifs_sb_info *); int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, const char *, const void *, const __u16, const struct nls_table *, struct cifs_sb_info *); struct smb_ntsd * (*get_acl)(struct cifs_sb_info *cifssb, struct inode *ino, const char *patch, u32 *plen, u32 info); struct smb_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *cifssmb, const struct cifs_fid *pfid, u32 *plen, u32 info); int (*set_acl)(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, const char *path, int flag); /* writepages retry size */ unsigned int (*wp_retry_size)(struct inode *); /* get mtu credits */ int (*wait_mtu_credits)(struct TCP_Server_Info *, size_t, size_t *, struct cifs_credits *); /* adjust previously taken mtu credits to request size */ int (*adjust_credits)(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, unsigned int /*enum smb3_rw_credits_trace*/ trace); /* check if we need to issue closedir */ bool (*dir_needs_close)(struct cifsFileInfo *); long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, loff_t); /* init transform (compress/encrypt) request */ int (*init_transform_rq)(struct TCP_Server_Info *, int num_rqst, struct smb_rqst *, struct smb_rqst *); int (*is_transform_hdr)(void *buf); int (*receive_transform)(struct TCP_Server_Info *, struct mid_q_entry **, char **, int *); enum securityEnum (*select_sectype)(struct TCP_Server_Info *, enum securityEnum); int (*next_header)(struct TCP_Server_Info *server, char *buf, unsigned int *noff); /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ int (*make_node)(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t device_number); /* version specific fiemap implementation */ int (*fiemap)(struct cifs_tcon *tcon, struct cifsFileInfo *, struct fiemap_extent_info *, u64, u64); /* version specific llseek implementation */ loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int); /* Check for STATUS_IO_TIMEOUT */ bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); struct reparse_data_buffer * (*get_reparse_point_buffer)(const struct kvec *rsp_iov, u32 *plen); struct inode * (*create_reparse_inode)(struct cifs_open_info_data *data, struct super_block *sb, const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, bool directory, struct kvec *reparse_iov, struct kvec *xattr_iov); }; struct smb_version_values { char *version_string; __u16 protocol_id; __u32 req_capabilities; __u32 large_lock_type; __u32 exclusive_lock_type; __u32 shared_lock_type; __u32 unlock_lock_type; size_t header_preamble_size; size_t header_size; size_t max_header_size; size_t read_rsp_size; __le16 lock_cmd; unsigned int cap_unix; unsigned int cap_nt_find; unsigned int cap_large_files; unsigned int cap_unicode; __u16 signing_enabled; __u16 signing_required; size_t create_lease_size; }; #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) #define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) #define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when * trying to reuse existing superblock for a new mount */ #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \ CIFS_MOUNT_SERVER_INUM | CIFS_MOUNT_DIRECT_IO | \ CIFS_MOUNT_NO_XATTR | CIFS_MOUNT_MAP_SPECIAL_CHR | \ CIFS_MOUNT_MAP_SFM_CHR | \ CIFS_MOUNT_UNX_EMUL | CIFS_MOUNT_NO_BRL | \ CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_OVERR_UID | \ CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \ CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \ CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \ CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID | \ CIFS_MOUNT_UID_FROM_ACL | CIFS_MOUNT_NO_HANDLE_CACHE | \ CIFS_MOUNT_NO_DFS | CIFS_MOUNT_MODE_FROM_SID | \ CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE) /** * Generic VFS superblock mount flags (s_flags) to consider when * trying to reuse existing superblock for a new mount */ #define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \ SB_NODEV | SB_SYNCHRONOUS) struct cifs_mnt_data { struct cifs_sb_info *cifs_sb; struct smb3_fs_context *ctx; int flags; }; static inline unsigned int get_rfc1002_length(void *buf) { return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; struct list_head rlist; /* reconnect list */ spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ int rfc1001_sessinit; /* whether to estasblish netbios session */ bool with_rfc1001; /* if netbios session is used */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; struct smb_version_operations *ops; struct smb_version_values *vals; /* updates to tcpStatus protected by cifs_tcp_ses_lock */ enum statusEnum tcpStatus; /* what we think the status is */ char *hostname; /* hostname portion of UNC string */ struct socket *ssocket; struct sockaddr_storage dstaddr; struct sockaddr_storage srcaddr; /* locally bind to this IP */ #ifdef CONFIG_NET_NS struct net *net; #endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ spinlock_t mid_queue_lock; /* protect mid queue */ spinlock_t mid_counter_lock; struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ bool nosharesock; bool tcp_nodelay; bool terminate; int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ spinlock_t req_lock; /* protect the two values above */ struct mutex _srv_mutex; unsigned int nofs_flag; struct task_struct *tsk; char server_GUID[16]; __u16 sec_mode; bool sign; /* is signing enabled on this connection? */ bool ignore_signature:1; /* skip validation of signatures in SMB2/3 rsp */ bool session_estab; /* mark when very first sess is established */ int echo_credits; /* echo reserved slots */ int oplock_credits; /* oplock break reserved slots */ bool echoes:1; /* enable echoes */ __u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */ u16 dialect; /* dialect index that server chose */ bool oplocks:1; /* enable oplocks */ unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ /* multiplexed reads or writes (for SMB1/CIFS only, not SMB2/SMB3) */ unsigned int maxBuf; /* maxBuf specifies the maximum */ /* message size the server can send or receive for non-raw SMBs */ /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */ /* when socket is setup (and during reconnect) before NegProt sent */ unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 current_mid; /* multiplex id - rotating counter, protected by mid_counter_lock */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* for signing, protected by srv_mutex */ __u32 reconnect_instance; /* incremented on each reconnect */ __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ unsigned long neg_start; /* when negotiate started (jiffies) */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ char negflavor; /* NEGOTIATE response flavor */ /* extended security flavors that server supports */ bool sec_ntlmssp; /* supports NTLMSSP */ bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_iakerb; /* supports pass-through auth for Kerberos (krb5 proxy) */ bool large_buf; /* is current buffer large? */ /* use SMBD connection instead of socket */ bool rdma; /* point to the SMBD connection if RDMA is used instead of socket */ struct smbd_connection *smbd_conn; struct delayed_work echo; /* echo ping workqueue job */ char *smallbuf; /* pointer to current "small" buffer */ char *bigbuf; /* pointer to current "big" buffer */ /* Total size of this PDU. Only valid from cifs_demultiplex_thread */ unsigned int pdu_size; unsigned int total_read; /* total amount of data read in this pass */ atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ #ifdef CONFIG_CIFS_STATS2 atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */ atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */ __u64 time_per_cmd[NUMBER_OF_SMB2_COMMANDS]; /* total time per cmd */ __u32 slowest_cmd[NUMBER_OF_SMB2_COMMANDS]; __u32 fastest_cmd[NUMBER_OF_SMB2_COMMANDS]; #endif /* STATS2 */ unsigned int max_read; unsigned int max_write; unsigned int min_offload; /* * If payload is less than or equal to the threshold, * use RDMA send/recv to send upper layer I/O. * If payload is more than the threshold, * use RDMA read/write through memory registration for I/O. */ unsigned int rdma_readwrite_threshold; unsigned int retrans; struct { bool requested; /* "compress" mount option set*/ bool enabled; /* actually negotiated with server */ __le16 alg; /* preferred alg negotiated with server */ } compression; __u16 signing_algorithm; __le16 cipher_type; /* save initial negprot hash */ __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; bool signing_negotiated; /* true if valid signing context rcvd from server */ bool posix_ext_supported; struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; /* * Number of targets available for reconnect. The more targets * the more tasks have to wait to let the demultiplex thread * reconnect. */ int nr_targets; bool noblockcnt; /* use non-blocking connect() */ /* * If this is a session channel, * primary_server holds the ref-counted * pointer to primary channel connection for the session. */ #define SERVER_IS_CHAN(server) (!!(server)->primary_server) struct TCP_Server_Info *primary_server; __u16 channel_sequence_num; /* incremented on primary channel on each chan reconnect */ #ifdef CONFIG_CIFS_SWN_UPCALL bool use_swn_dstaddr; struct sockaddr_storage swn_dstaddr; #endif /* * Canonical DFS referral path used in cifs_reconnect() for failover as * well as in DFS cache refresher. * * format: \\HOST\SHARE[\OPTIONAL PATH] */ char *leaf_fullpath; bool dfs_conn:1; char dns_dom[CIFS_MAX_DOMAINNAME_LEN + 1]; }; static inline bool is_smb1(struct TCP_Server_Info *server) { return HEADER_PREAMBLE_SIZE(server) != 0; } static inline void cifs_server_lock(struct TCP_Server_Info *server) { unsigned int nofs_flag = memalloc_nofs_save(); mutex_lock(&server->_srv_mutex); server->nofs_flag = nofs_flag; } static inline void cifs_server_unlock(struct TCP_Server_Info *server) { unsigned int nofs_flag = server->nofs_flag; mutex_unlock(&server->_srv_mutex); memalloc_nofs_restore(nofs_flag); } struct cifs_credits { unsigned int value; unsigned int instance; unsigned int in_flight_check; unsigned int rreq_debug_id; unsigned int rreq_debug_index; }; static inline unsigned int in_flight(struct TCP_Server_Info *server) { unsigned int num; spin_lock(&server->req_lock); num = server->in_flight; spin_unlock(&server->req_lock); return num; } static inline bool has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) { int num; spin_lock(&server->req_lock); num = *credits; spin_unlock(&server->req_lock); return num >= num_credits; } static inline void add_credits(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { server->ops->add_credits(server, credits, optype); } static inline void add_credits_and_wake_if(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { if (credits->value) { server->ops->add_credits(server, credits, optype); wake_up(&server->request_q); credits->value = 0; } } static inline void set_credits(struct TCP_Server_Info *server, const int val) { server->ops->set_credits(server, val); } static inline int adjust_credits(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, unsigned int /* enum smb3_rw_credits_trace */ trace) { return server->ops->adjust_credits ? server->ops->adjust_credits(server, subreq, trace) : 0; } static inline __le64 get_next_mid64(struct TCP_Server_Info *server) { return cpu_to_le64(server->ops->get_next_mid(server)); } static inline __le16 get_next_mid(struct TCP_Server_Info *server) { __u16 mid = server->ops->get_next_mid(server); /* * The value in the SMB header should be little endian for easy * on-the-wire decoding. */ return cpu_to_le16(mid); } static inline void revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { if (server->ops->revert_current_mid) server->ops->revert_current_mid(server, val); } static inline void revert_current_mid_from_hdr(struct TCP_Server_Info *server, const struct smb2_hdr *shdr) { unsigned int num = le16_to_cpu(shdr->CreditCharge); return revert_current_mid(server, num > 0 ? num : 1); } static inline __u16 get_mid(const struct smb_hdr *smb) { return le16_to_cpu(smb->Mid); } static inline bool compare_mid(__u16 mid, const struct smb_hdr *smb) { return mid == le16_to_cpu(smb->Mid); } /* * When the server supports very large reads and writes via POSIX extensions, * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not * including the RFC1001 length. * * Note that this might make for "interesting" allocation problems during * writeback however as we have to allocate an array of pointers for the * pages. A 16M write means ~32kb page array with PAGE_SIZE == 4096. * * For reads, there is a similar problem as we need to allocate an array * of kvecs to handle the receive, though that should only need to be done * once. */ #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) /* * When the server doesn't allow large posix writes, only allow a rsize/wsize * of 2^17-1 minus the size of the call header. That allows for a read or * write up to the maximum size described by RFC1002. */ #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to * those values when posix extensions aren't in force. In actuality here, we * use 65536 to allow for a write that is a multiple of 4k. Most servers seem * to be ok with the extra byte even though Windows doesn't send writes that * are that large. * * Citation: * * https://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx */ #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. */ #ifdef CONFIG_NET_NS static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) { return srv->net; } static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) { srv->net = net; } #else static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) { return &init_net; } static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) { } #endif struct cifs_server_iface { struct list_head iface_head; struct kref refcount; size_t speed; size_t weight_fulfilled; unsigned int num_channels; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; unsigned int is_active : 1; /* unset if non existent */ struct sockaddr_storage sockaddr; }; /* release iface when last ref is dropped */ static inline void release_iface(struct kref *ref) { struct cifs_server_iface *iface = container_of(ref, struct cifs_server_iface, refcount); kfree(iface); } struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; struct cifs_server_iface *iface; /* interface in use */ __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) #define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) /* * Session structure. One of these for each uid session with a particular host */ struct cifs_ses { struct list_head smb_ses_list; struct list_head rlist; /* reconnect list */ struct list_head tcon_list; struct list_head dlist; /* dfs list */ struct cifs_tcon *tcon_ipc; spinlock_t ses_lock; /* protect anything here that is not protected */ struct mutex session_mutex; struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum ses_status_enum ses_status; /* updates protected by cifs_tcp_ses_lock */ unsigned int overrideSecFlg; /* if non-zero override global sec flags */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ __u64 Suid; /* remote smb uid */ kuid_t linux_uid; /* overriding owner of files on the mount */ kuid_t cred_uid; /* owner of credentials */ unsigned int capabilities; char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */ char *user_name; /* must not be null except during init of sess and after mount option parsing we fill it */ char *domainName; char *password; char *password2; /* When key rotation used, new password may be set before it expires */ char workstation_name[CIFS_MAX_WORKSTATION_LEN]; struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ enum securityEnum sectype; /* what security flavor was specified? */ enum upcall_target_enum upcall_target; /* what upcall target was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; bool expired_pwd; /* track if access denied or expired pwd so can know if need to update */ int unicode; unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; /* * Network interfaces available on the server this session is * connected to. * * Other channels can be opened by connecting and binding this * session to interfaces from this list. * * iface_lock should be taken when accessing any of these fields */ spinlock_t iface_lock; /* ========= begin: protected by iface_lock ======== */ struct list_head iface_list; size_t iface_count; unsigned long iface_last_update; /* jiffies */ /* ========= end: protected by iface_lock ======== */ spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 #define CIFS_INVAL_CHAN_INDEX (-1) #define CIFS_ALL_CHANNELS_SET(ses) \ ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_GOOD(ses) \ (!(ses)->chans_need_reconnect) #define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \ test_bit((index), &(ses)->chans_need_reconnect) #define CIFS_CHAN_IN_RECONNECT(ses, index) \ ((ses)->chans[(index)].in_reconnect) struct cifs_chan chans[CIFS_MAX_CHANNELS]; size_t chan_count; size_t chan_max; atomic_t chan_seq; /* round robin state */ /* * chans_need_reconnect is a bitmap indicating which of the channels * under this smb session needs to be reconnected. * If not multichannel session, only one bit will be used. * * We will ask for sess and tcon reconnection only if all the * channels are marked for needing reconnection. This will * enable the sessions on top to continue to live till any * of the channels below are active. */ unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ struct cifs_ses *dfs_root_ses; struct nls_table *local_nls; char *dns_dom; /* FQDN of the domain */ }; static inline bool cap_unix(struct cifs_ses *ses) { return ses->server->vals->cap_unix & ses->capabilities; } /* * common struct for holding inode info when searching for or updating an * inode with new info */ #define CIFS_FATTR_JUNCTION 0x1 #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 #define CIFS_FATTR_INO_COLLISION 0x8 #define CIFS_FATTR_UNKNOWN_NLINK 0x10 #define CIFS_FATTR_FAKE_ROOT_INO 0x20 struct cifs_fattr { u32 cf_flags; u32 cf_cifsattrs; u64 cf_uniqueid; u64 cf_eof; u64 cf_bytes; u64 cf_createtime; kuid_t cf_uid; kgid_t cf_gid; umode_t cf_mode; dev_t cf_rdev; unsigned int cf_nlink; unsigned int cf_dtype; struct timespec64 cf_atime; struct timespec64 cf_mtime; struct timespec64 cf_ctime; u32 cf_cifstag; char *cf_symlink_target; }; /* * there is one of these for each connection to a resource on a particular * session */ struct cifs_tcon { struct list_head tcon_list; int debug_id; /* Debugging for tracing */ int tc_count; struct list_head rlist; /* reconnect list */ spinlock_t tc_lock; /* protect anything here that is not protected */ atomic_t num_local_opens; /* num of all opens including disconnected */ atomic_t num_remote_opens; /* num of all network opens on server */ struct list_head openFileList; spinlock_t open_file_lock; /* protects list above */ struct cifs_ses *ses; /* pointer to session associated with */ char tree_name[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ char *nativeFileSystem; char *password; /* for share-level security */ __u32 tid; /* The 4 byte tree id */ __u16 Flags; /* optional support bits */ enum tid_status_enum status; atomic_t num_smbs_sent; union { struct { atomic_t num_writes; atomic_t num_reads; atomic_t num_flushes; atomic_t num_oplock_brks; atomic_t num_opens; atomic_t num_closes; atomic_t num_deletes; atomic_t num_mkdirs; atomic_t num_posixopens; atomic_t num_posixmkdirs; atomic_t num_rmdirs; atomic_t num_renames; atomic_t num_t2renames; atomic_t num_ffirst; atomic_t num_fnext; atomic_t num_fclose; atomic_t num_hardlinks; atomic_t num_symlinks; atomic_t num_locks; atomic_t num_acl_get; atomic_t num_acl_set; } cifs_stats; struct { atomic_t smb2_com_sent[NUMBER_OF_SMB2_COMMANDS]; atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS]; } smb2_stats; } stats; __u64 bytes_read; __u64 bytes_written; spinlock_t stat_lock; /* protects the two fields above */ time64_t stats_from_time; FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; bool ipc:1; /* set if connection to IPC$ share (always also pipe) */ bool pipe:1; /* set if connection to pipe share */ bool print:1; /* set if connection to printer share */ bool retry:1; bool nocase:1; bool nohandlecache:1; /* if strange server resource prob can turn off */ bool nodelete:1; bool seal:1; /* transport encryption for this mounted share */ bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ bool posix_extensions; /* if true SMB3.11 posix extensions enabled */ bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ bool need_reopen_files:1; /* need to reopen tcon file handles */ bool use_resilient:1; /* use resilient instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ bool dummy:1; /* dummy tcon used for reconnecting channels */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; __u32 vol_serial_number; __le64 vol_create_time; __u64 snapshot_time; /* for timewarp tokens - timestamp of snapshot */ __u32 handle_timeout; /* persistent and durable handle timeout in ms */ __u32 ss_flags; /* sector size flags */ __u32 perf_sector_size; /* best sector size for perf */ __u32 max_chunks; __u32 max_bytes_chunk; __u32 max_bytes_copy; __u32 max_cached_dirs; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ bool fscache_acquired; /* T if we've tried acquiring a cookie */ struct fscache_volume *fscache; /* cookie for share */ struct mutex fscache_lock; /* Prevent regetting a cookie */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; struct list_head cifs_sb_list; spinlock_t sb_list_lock; #ifdef CONFIG_CIFS_DFS_UPCALL struct delayed_work dfs_cache_work; struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ char *origin_fullpath; /* canonical copy of smb3_fs_context::source */ }; /* * This is a refcounted and timestamped container for a tcon pointer. The * container holds a tcon reference. It is considered safe to free one of * these when the tl_count goes to 0. The tl_time is the time of the last * "get" on the container. */ struct tcon_link { struct rb_node tl_rbnode; kuid_t tl_uid; unsigned long tl_flags; #define TCON_LINK_MASTER 0 #define TCON_LINK_PENDING 1 #define TCON_LINK_IN_TREE 2 unsigned long tl_time; atomic_t tl_count; struct cifs_tcon *tl_tcon; }; extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); extern void smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst); static inline struct cifs_tcon * tlink_tcon(struct tcon_link *tlink) { return tlink->tl_tcon; } static inline struct tcon_link * cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) { return cifs_sb->master_tlink; } extern void cifs_put_tlink(struct tcon_link *tlink); static inline struct tcon_link * cifs_get_tlink(struct tcon_link *tlink) { if (tlink && !IS_ERR(tlink)) atomic_inc(&tlink->tl_count); return tlink; } /* This function is always expected to succeed */ extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); #define CIFS_OPLOCK_NO_CHANGE 0xfe struct cifs_pending_open { struct list_head olist; struct tcon_link *tlink; __u8 lease_key[16]; __u32 oplock; }; struct cifs_deferred_close { struct list_head dlist; struct tcon_link *tlink; __u16 netfid; __u64 persistent_fid; __u64 volatile_fid; }; /* * This info hangs off the cifsFileInfo structure, pointed to by llist. * This is used to track byte stream locks on the file */ struct cifsLockInfo { struct list_head llist; /* pointer to next cifsLockInfo */ struct list_head blist; /* pointer to locks blocked on this */ wait_queue_head_t block_q; __u64 offset; __u64 length; __u32 pid; __u16 type; __u16 flags; }; /* * One of these for each open instance of a file */ struct cifs_search_info { loff_t index_of_last_entry; __u16 entries_in_buffer; __u16 info_level; __u32 resume_key; char *ntwrk_buf_start; char *srch_entries_start; char *last_entry; const char *presume_name; unsigned int resume_name_len; bool endOfSearch:1; bool emptyDir:1; bool unicode:1; bool smallBuf:1; /* so we know which buf_release function to call */ }; #define ACL_NO_MODE ((umode_t)(-1)) struct cifs_open_parms { struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; int disposition; int desired_access; int create_options; const char *path; struct cifs_fid *fid; umode_t mode; bool reconnect:1; bool replay:1; /* indicates that this open is for a replay */ struct kvec *ea_cctx; __le32 lease_flags; }; struct cifs_fid { __u16 netfid; __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; __u8 create_guid[16]; __u32 access; struct cifs_pending_open *pending_open; __u16 epoch; #ifdef CONFIG_CIFS_DEBUG2 __u64 mid; #endif /* CIFS_DEBUG2 */ bool purge_cache; }; struct cifs_fid_locks { struct list_head llist; struct cifsFileInfo *cfile; /* fid that owns locks */ struct list_head locks; /* locks held by fid above */ }; struct cifsFileInfo { /* following two lists are protected by tcon->open_file_lock */ struct list_head tlist; /* pointer to next fid owned by tcon */ struct list_head flist; /* next fid (file instance) for this inode */ /* lock list below protected by cifsi->lock_sem */ struct cifs_fid_locks *llist; /* brlocks held by this fid */ kuid_t uid; /* allows finding which FileInfo structure */ __u32 pid; /* process id who opened file */ struct cifs_fid fid; /* file id from remote */ struct list_head rlist; /* reconnect list */ /* BB add lock scope info here if needed */ /* lock scope id (0 if none) */ struct dentry *dentry; struct tcon_link *tlink; unsigned int f_flags; bool invalidHandle:1; /* file closed via session abend */ bool swapfile:1; bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ bool offload:1; /* offload final part of _put to a wq */ __u16 oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; spinlock_t file_info_lock; /* protects four flag/count fields above */ struct mutex fh_mutex; /* prevents reopen race after dead ses*/ struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; }; struct cifs_io_parms { __u16 netfid; __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u32 pid; __u64 offset; unsigned int length; struct cifs_tcon *tcon; struct TCP_Server_Info *server; }; struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; pid_t pid; }; /* asynchronous read support */ struct cifs_io_subrequest { union { struct netfs_io_subrequest subreq; struct netfs_io_request *rreq; struct cifs_io_request *req; }; ssize_t got_bytes; unsigned int xid; int result; bool have_xid; bool replay; struct kvec iov[2]; struct TCP_Server_Info *server; #ifdef CONFIG_CIFS_SMB_DIRECT struct smbdirect_mr_io *mr; #endif struct cifs_credits credits; }; /* * Take a reference on the file private data. Must be called with * cfile->file_info_lock held. */ static inline void cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) { ++cifs_file->count; } struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr, bool offload); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); int cifs_file_flush(const unsigned int xid, struct inode *inode, struct cifsFileInfo *cfile); int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, const char *full_path, struct cifsFileInfo *open_file, loff_t size); #define CIFS_CACHE_READ_FLG 1 #define CIFS_CACHE_HANDLE_FLG 2 #define CIFS_CACHE_RH_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_HANDLE_FLG) #define CIFS_CACHE_WRITE_FLG 4 #define CIFS_CACHE_RW_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG) #define CIFS_CACHE_RHW_FLG (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG) #define CIFS_CACHE_READ(cinode) ((cinode->oplock & CIFS_CACHE_READ_FLG) || (CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE)) #define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG) #define CIFS_CACHE_WRITE(cinode) ((cinode->oplock & CIFS_CACHE_WRITE_FLG) || (CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE)) /* * One of these for each file inode */ struct cifsInodeInfo { struct netfs_inode netfs; /* Netfslib context and vfs inode */ bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ /* * NOTE: Some code paths call down_read(lock_sem) twice, so * we must always use cifs_down_write() instead of down_write() * for this semaphore to avoid deadlocks. */ struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ __u16 epoch; /* used to track lease state changes */ #define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ #define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ #define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */ #define CIFS_INO_DELETE_PENDING (3) /* delete pending on server */ #define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */ #define CIFS_INO_LOCK (5) /* lock bit for synchronization */ #define CIFS_INO_CLOSE_ON_LOCK (7) /* Not to defer the close when lock is set */ unsigned long flags; spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ unsigned long time; /* jiffies of last update of inode */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ struct list_head deferred_closes; /* list of deferred closes */ spinlock_t deferred_lock; /* protection on deferred list */ bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */ char *symlink_target; __u32 reparse_tag; }; static inline struct cifsInodeInfo * CIFS_I(struct inode *inode) { return container_of(inode, struct cifsInodeInfo, netfs.inode); } static inline struct cifs_sb_info * CIFS_SB(struct super_block *sb) { return sb->s_fs_info; } static inline struct cifs_sb_info * CIFS_FILE_SB(struct file *file) { return CIFS_SB(file_inode(file)->i_sb); } static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) return '/'; else return '\\'; } static inline void convert_delimiter(char *path, char delim) { char old_delim, *pos; if (delim == '/') old_delim = '\\'; else old_delim = '/'; pos = path; while ((pos = strchr(pos, old_delim))) *pos = delim; } #define cifs_stats_inc atomic_inc static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon, unsigned int bytes) { if (bytes) { spin_lock(&tcon->stat_lock); tcon->bytes_written += bytes; spin_unlock(&tcon->stat_lock); } } static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon, unsigned int bytes) { spin_lock(&tcon->stat_lock); tcon->bytes_read += bytes; spin_unlock(&tcon->stat_lock); } /* * This is the prototype for the mid receive function. This function is for * receiving the rest of the SMB frame, starting with the WordCount (which is * just after the MID in struct smb_hdr). Note: * * - This will be called by cifsd, with no locks held. * - The mid will still be on the pending_mid_q. * - mid->resp_buf will point to the current buffer. * * Returns zero on a successful receive, or an error. The receive state in * the TCP_Server_Info will also be updated. */ typedef int (mid_receive_t)(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* * This is the prototype for the mid callback function. This is called once the * mid has been received off of the socket. When creating one, take special * care to avoid deadlocks. Things to bear in mind: * * - it will be called by cifsd, with no locks held * - the mid will be removed from any lists */ typedef void (mid_callback_t)(struct mid_q_entry *mid); /* * This is the protopyte for mid handle function. This is called once the mid * has been recognized after decryption of the message. */ typedef int (mid_handle_t)(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ struct kref refcount; struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u16 credits; /* number of credits consumed by this mid */ __u16 credits_received; /* number of credits from the response */ __u32 pid; /* process id */ __u32 sequence_number; /* for CIFS signing */ unsigned long when_alloc; /* when mid was created */ #ifdef CONFIG_CIFS_STATS2 unsigned long when_sent; /* time when smb send finished */ unsigned long when_received; /* when demux complete (taken off wire) */ #endif mid_receive_t *receive; /* call receive callback */ mid_callback_t *callback; /* call completion callback */ mid_handle_t *handle; /* call handle mid callback */ void *callback_data; /* general purpose pointer for callback */ struct task_struct *creator; void *resp_buf; /* pointer to received SMB header */ unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ int mid_rc; /* rc for MID_RC */ __le16 command; /* smb command code */ unsigned int optype; /* operation type */ spinlock_t mid_lock; bool wait_cancelled:1; /* Cancelled while waiting for response */ bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ bool multiEnd:1; /* both received */ bool decrypted:1; /* decrypted entry */ }; struct close_cancelled_open { struct cifs_fid fid; struct cifs_tcon *tcon; struct work_struct work; __u64 mid; __u16 cmd; }; /* Make code in transport.c a little cleaner by moving update of optional stats into function below */ static inline void cifs_in_send_inc(struct TCP_Server_Info *server) { atomic_inc(&server->in_send); } static inline void cifs_in_send_dec(struct TCP_Server_Info *server) { atomic_dec(&server->in_send); } static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) { atomic_inc(&server->num_waiters); } static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) { atomic_dec(&server->num_waiters); } #ifdef CONFIG_CIFS_STATS2 static inline void cifs_save_when_sent(struct mid_q_entry *mid) { mid->when_sent = jiffies; } #else static inline void cifs_save_when_sent(struct mid_q_entry *mid) { } #endif /* for pending dnotify requests */ struct dir_notify_req { struct list_head lhead; __le16 Pid; __le16 PidHigh; __u16 Mid; __u16 Tid; __u16 Uid; __u16 netfid; __u32 filter; /* CompletionFilter (for multishot) */ int multishot; struct file *pfile; }; struct dfs_info3_param { int flags; /* DFSREF_REFERRAL_SERVER, DFSREF_STORAGE_SERVER*/ int path_consumed; int server_type; int ref_flag; char *path_name; char *node_name; int ttl; }; struct file_list { struct list_head list; struct cifsFileInfo *cfile; }; struct cifs_mount_ctx { struct cifs_sb_info *cifs_sb; struct smb3_fs_context *fs_ctx; unsigned int xid; struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; }; static inline void __free_dfs_info_param(struct dfs_info3_param *param) { kfree(param->path_name); kfree(param->node_name); } static inline void free_dfs_info_param(struct dfs_info3_param *param) { if (param) __free_dfs_info_param(param); } static inline void zfree_dfs_info_param(struct dfs_info3_param *param) { if (param) { __free_dfs_info_param(param); memset(param, 0, sizeof(*param)); } } static inline void free_dfs_info_array(struct dfs_info3_param *param, int number_of_items) { int i; if ((number_of_items == 0) || (param == NULL)) return; for (i = 0; i < number_of_items; i++) { kfree(param[i].path_name); kfree(param[i].node_name); } kfree(param); } static inline bool is_interrupt_error(int error) { switch (error) { case -EINTR: case -ERESTARTSYS: case -ERESTARTNOHAND: case -ERESTARTNOINTR: return true; } return false; } static inline bool is_retryable_error(int error) { if (is_interrupt_error(error) || error == -EAGAIN) return true; return false; } static inline bool is_replayable_error(int error) { if (error == -EAGAIN || error == -ECONNABORTED) return true; return false; } /* cifs_get_writable_file() flags */ enum cifs_writable_file_flags { FIND_WR_ANY = 0U, FIND_WR_FSUID_ONLY = (1U << 0), FIND_WR_WITH_DELETE = (1U << 1), FIND_WR_NO_PENDING_DELETE = (1U << 2), }; #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 #define MID_RESPONSE_RECEIVED 4 #define MID_RETRY_NEEDED 8 /* session closed while this request out */ #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 #define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */ #define MID_RC 0x80 /* mid_rc contains custom rc */ /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 #define CIFS_LARGE_BUFFER 2 #define CIFS_IOVEC 4 /* array of response buffers */ /* Type of Request to SendReceive2 */ #define CIFS_BLOCKING_OP 1 /* operation can block */ #define CIFS_NON_BLOCKING 2 /* do not block waiting for credits */ #define CIFS_TIMEOUT_MASK 0x003 /* only one of above set in req */ #define CIFS_LOG_ERROR 0x010 /* log NT STATUS if non-zero */ #define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */ #define CIFS_NO_RSP_BUF 0x040 /* no response buffer required */ /* Type of request operation */ #define CIFS_ECHO_OP 0x080 /* echo request */ #define CIFS_OBREAK_OP 0x0100 /* oplock break request */ #define CIFS_NEG_OP 0x0200 /* negotiate request */ #define CIFS_CP_CREATE_CLOSE_OP 0x0400 /* compound create+close request */ /* Lower bitmask values are reserved by others below. */ #define CIFS_SESS_OP 0x2000 /* session setup request */ #define CIFS_OP_MASK 0x2780 /* mask request type */ #define CIFS_HAS_CREDITS 0x0400 /* already has credits */ #define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */ #define CIFS_NO_SRV_RSP 0x1000 /* there is no server response */ #define CIFS_COMPRESS_REQ 0x4000 /* compress request before sending */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 #define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 /* note that only one of the following can be set so the result of setting MUST flags more than once will be to require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL #define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else #define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ #define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) #define CIFSSEC_MAX (CIFSSEC_MAY_SIGN | CIFSSEC_MUST_KRB5 | CIFSSEC_MAY_SEAL) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* ***************************************************************** * All constants go here ***************************************************************** */ #define UID_HASH (16) /* * Note that ONE module should define _DECLARE_GLOBALS_HERE to cause the * following to be declared. */ /**************************************************************************** * Here are all the locks (spinlock, mutex, semaphore) in cifs.ko, arranged according * to the locking order. i.e. if two locks are to be held together, the lock that * appears higher in this list needs to be taken before the other. * * If you hold a lock that is lower in this list, and you need to take a higher lock * (or if you think that one of the functions that you're calling may need to), first * drop the lock you hold, pick up the higher lock, then the lower one. This will * ensure that locks are picked up only in one direction in the below table * (top to bottom). * * Also, if you expect a function to be called with a lock held, explicitly document * this in the comments on top of your function definition. * * And also, try to keep the critical sections (lock hold time) to be as minimal as * possible. Blocking / calling other functions with a lock held always increase * the risk of a possible deadlock. * * Following this rule will avoid unnecessary deadlocks, which can get really hard to * debug. Also, any new lock that you introduce, please add to this list in the correct * order. * * Please populate this list whenever you introduce new locks in your changes. Or in * case I've missed some existing locks. Please ensure that it's added in the list * based on the locking order expected. * * ===================================================================================== * Lock Protects Initialization fn * ===================================================================================== * vol_list_lock * vol_info->ctx_lock vol_info->ctx * cifs_sb_info->tlink_tree_lock cifs_sb_info->tlink_tree cifs_setup_cifs_sb * TCP_Server_Info-> TCP_Server_Info cifs_get_tcp_session * reconnect_mutex * TCP_Server_Info->srv_mutex TCP_Server_Info cifs_get_tcp_session * cifs_ses->session_mutex cifs_ses sesInfoAlloc * cifs_tcon->open_file_lock cifs_tcon->openFileList tconInfoAlloc * cifs_tcon->pending_opens * cifs_tcon->stat_lock cifs_tcon->bytes_read tconInfoAlloc * cifs_tcon->bytes_written * cifs_tcp_ses_lock cifs_tcp_ses_list sesInfoAlloc * GlobalMid_Lock GlobalMaxActiveXid init_cifs * GlobalCurrentXid * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session * mid_q_entry->deleted_from_q * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session * ->credits * ->echo_credits * ->oplock_credits * ->reconnect_instance * cifs_ses->ses_lock (anything that is not protected by another lock and can change) * sesInfoAlloc * cifs_ses->iface_lock cifs_ses->iface_list sesInfoAlloc * ->iface_count * ->iface_last_update * cifs_ses->chan_lock cifs_ses->chans sesInfoAlloc * ->chans_need_reconnect * ->chans_in_reconnect * cifs_tcon->tc_lock (anything that is not protected by another lock and can change) * tcon_info_alloc * inode->i_rwsem, taken by fs/netfs/locking.c e.g. should be taken before cifsInodeInfo locks * cifsInodeInfo->open_file_lock cifsInodeInfo->openFileList cifs_alloc_inode * cifsInodeInfo->writers_lock cifsInodeInfo->writers cifsInodeInfo_alloc * cifsInodeInfo->lock_sem cifsInodeInfo->llist cifs_init_once * ->can_cache_brlcks * cifsInodeInfo->deferred_lock cifsInodeInfo->deferred_closes cifsInodeInfo_alloc * cached_fids->cfid_list_lock cifs_tcon->cfids->entries init_cached_dirs * cached_fid->fid_lock (anything that is not protected by another lock and can change) * init_cached_dir * cifsFileInfo->fh_mutex cifsFileInfo cifs_new_fileinfo * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled * mid_q_entry->mid_lock mid_q_entry->callback alloc_mid * smb2_mid_entry_alloc * (Any fields of mid_q_entry that will need protection) ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE #define GLOBAL_EXTERN #else #define GLOBAL_EXTERN extern #endif /* * the list of TCP_Server_Info structures, ie each of the sockets * connecting our client to a distinct server (ip address), is * chained together by cifs_tcp_ses_list. The list of all our SMB * sessions (and from that the tree connections) can be found * by iterating over cifs_tcp_ses_list */ extern struct list_head cifs_tcp_ses_list; /* * This lock protects the cifs_tcp_ses_list, the list of smb sessions per * tcp session, and the list of tcon's per smb session. It also protects * the reference counters for the server, smb session, and tcon. * generally the locks should be taken in order tcp_ses_lock before * tcon->open_file_lock and that before file->file_info_lock since the * structure order is cifs_socket-->cifs_ses-->cifs_tcon-->cifs_file */ extern spinlock_t cifs_tcp_ses_lock; /* * Global transaction id (XID) information */ extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ extern spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* * Global counters, updated atomically */ extern atomic_t sesInfoAllocCount; extern atomic_t tconInfoAllocCount; extern atomic_t tcpSesNextId; extern atomic_t tcpSesAllocCount; extern atomic_t tcpSesReconnectCount; extern atomic_t tconInfoReconnectCount; /* Various Debug counters */ extern atomic_t buf_alloc_count; /* current number allocated */ extern atomic_t small_buf_alloc_count; #ifdef CONFIG_CIFS_STATS2 extern atomic_t total_buf_alloc_count; /* total allocated over all time */ extern atomic_t total_small_buf_alloc_count; extern unsigned int slow_rsp_threshold; /* number of secs before logging */ #endif /* Misc globals */ extern bool enable_oplocks; /* enable or disable oplocks */ extern bool lookupCacheEnabled; extern unsigned int global_secflags; /* if on, session setup sent with more secure ntlmssp2 challenge/resp */ extern unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ extern bool enable_gcm_256; /* allow optional negotiate of strongest signing (aes-gcm-256) */ extern bool require_gcm_256; /* require use of strongest signing (aes-gcm-256) */ extern bool enable_negotiate_signing; /* request use of faster (GMAC) signing if available */ extern bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ extern unsigned int CIFSMaxBufSize; /* max size not including hdr */ extern unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ extern unsigned int cifs_min_small; /* min size of small buf pool */ extern unsigned int cifs_max_pending; /* MAX requests at once to server*/ extern unsigned int dir_cache_timeout; /* max time for directory lease caching of dir */ extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */ extern atomic_t mid_count; void cifs_oplock_break(struct work_struct *work); void cifs_queue_oplock_break(struct cifsFileInfo *cfile); void smb2_deferred_work_close(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; extern struct workqueue_struct *serverclose_wq; extern struct workqueue_struct *cfid_put_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; extern mempool_t cifs_io_request_pool; extern mempool_t cifs_io_subrequest_pool; /* Operations for different SMB versions */ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #endif /* CIFS_ALLOW_INSECURE_LEGACY */ extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; extern struct smb_version_values smbdefault_values; extern struct smb_version_values smb3any_values; extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; extern struct smb_version_operations smb311_operations; extern struct smb_version_values smb311_values; static inline char *get_security_type_str(enum securityEnum sectype) { switch (sectype) { case RawNTLMSSP: return "RawNTLMSSP"; case Kerberos: return "Kerberos"; case NTLMv2: return "NTLMv2"; case IAKerb: return "IAKerb"; default: return "Unknown"; } } static inline bool is_smb1_server(struct TCP_Server_Info *server) { return strcmp(server->vals->version_string, SMB1_VERSION_STRING) == 0; } static inline bool is_tcon_dfs(struct cifs_tcon *tcon) { /* * For SMB1, see MS-CIFS 2.4.55 SMB_COM_TREE_CONNECT_ANDX (0x75) and MS-CIFS 3.3.4.4 DFS * Subsystem Notifies That a Share Is a DFS Share. * * For SMB2+, see MS-SMB2 2.2.10 SMB2 TREE_CONNECT Response and MS-SMB2 3.3.4.14 Server * Application Updates a Share. */ if (!tcon || !tcon->ses || !tcon->ses->server) return false; return is_smb1_server(tcon->ses->server) ? tcon->Flags & SMB_SHARE_IS_IN_DFS : tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT); } static inline bool cifs_is_referral_server(struct cifs_tcon *tcon, const struct dfs_info3_param *ref) { /* * Check if all targets are capable of handling DFS referrals as per * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL. */ return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER)); } static inline u64 cifs_flock_len(const struct file_lock *fl) { return (u64)fl->fl_end - fl->fl_start + 1; } static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses) { if (WARN_ON_ONCE(!ses || !ses->server)) return 0; /* * Make workstation name no more than 15 chars when using insecure dialects as some legacy * servers do require it during NTLMSSP. */ if (ses->server->dialect <= SMB20_PROT_ID) return min_t(size_t, sizeof(ses->workstation_name), RFC1001_NAME_LEN_WITH_NULL); return sizeof(ses->workstation_name); } static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const FILE_ALL_INFO *src) { memcpy(dst, src, (size_t)((u8 *)&src->EASize - (u8 *)src)); dst->IndexNumber = 0; dst->EASize = src->EASize; dst->AccessFlags = 0; dst->CurrentByteOffset = 0; dst->Mode = 0; dst->AlignmentRequirement = 0; dst->FileNameLength = src->FileNameLength; } static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, int num_rqst, const u8 *sig) { unsigned int len, skip; unsigned int nents = 0; unsigned long addr; size_t data_size; int i, j; /* * The first rqst has a transform header where the first 20 bytes are * not part of the encrypted blob. */ skip = 20; /* Assumes the first rqst has a transform header as the first iov. * I.e. * rqst[0].rq_iov[0] is transform header * rqst[0].rq_iov[1+] data to be encrypted/decrypted * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ for (i = 0; i < num_rqst; i++) { data_size = iov_iter_count(&rqst[i].rq_iter); /* We really don't want a mixture of pinned and unpinned pages * in the sglist. It's hard to keep track of which is what. * Instead, we convert to a BVEC-type iterator higher up. */ if (data_size && WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) return -EIO; /* We also don't want to have any extra refs or pins to clean * up in the sglist. */ if (data_size && WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) return -EIO; for (j = 0; j < rqst[i].rq_nvec; j++) { struct kvec *iov = &rqst[i].rq_iov[j]; addr = (unsigned long)iov->iov_base + skip; if (is_vmalloc_or_module_addr((void *)addr)) { len = iov->iov_len - skip; nents += DIV_ROUND_UP(offset_in_page(addr) + len, PAGE_SIZE); } else { nents++; } skip = 0; } if (data_size) nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); } nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); return nents; } /* We can not use the normal sg_set_buf() as we will sometimes pass a * stack object as buf. */ static inline void cifs_sg_set_buf(struct sg_table *sgtable, const void *buf, unsigned int buflen) { unsigned long addr = (unsigned long)buf; unsigned int off = offset_in_page(addr); addr &= PAGE_MASK; if (is_vmalloc_or_module_addr((void *)addr)) { do { unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); sg_set_page(&sgtable->sgl[sgtable->nents++], vmalloc_to_page((void *)addr), len, off); off = 0; addr += PAGE_SIZE; buflen -= len; } while (buflen); } else { sg_set_page(&sgtable->sgl[sgtable->nents++], virt_to_page((void *)addr), buflen, off); } } #define CIFS_OPARMS(_cifs_sb, _tcon, _path, _da, _cd, _co, _mode) \ ((struct cifs_open_parms) { \ .tcon = _tcon, \ .path = _path, \ .desired_access = (_da), \ .disposition = (_cd), \ .create_options = cifs_create_options(_cifs_sb, (_co)), \ .mode = (_mode), \ .cifs_sb = _cifs_sb, \ }) struct smb2_compound_vars { struct cifs_open_parms oparms; struct kvec rsp_iov[MAX_COMPOUND]; struct smb_rqst rqst[MAX_COMPOUND]; struct kvec open_iov[SMB2_CREATE_IOV_SIZE]; struct kvec qi_iov; struct kvec io_iov[SMB2_IOCTL_IOV_SIZE]; struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec unlink_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec rename_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec close_iov; struct smb2_file_rename_info_hdr rename_info; struct smb2_file_link_info_hdr link_info; struct kvec ea_iov; }; static inline bool cifs_ses_exiting(struct cifs_ses *ses) { bool ret; spin_lock(&ses->ses_lock); ret = ses->ses_status == SES_EXITING; spin_unlock(&ses->ses_lock); return ret; } static inline bool cifs_netbios_name(const char *name, size_t namelen) { bool ret = false; size_t i; if (namelen >= 1 && namelen <= RFC1001_NAME_LEN) { for (i = 0; i < namelen; i++) { const unsigned char c = name[i]; if (c == '\\' || c == '/' || c == ':' || c == '*' || c == '?' || c == '"' || c == '<' || c == '>' || c == '|' || c == '.') return false; if (!ret && isalpha(c)) ret = true; } } return ret; } /* * Execute mid callback atomically - ensures callback runs exactly once * and prevents sleeping in atomic context. */ static inline void mid_execute_callback(struct mid_q_entry *mid) { void (*callback)(struct mid_q_entry *mid); spin_lock(&mid->mid_lock); callback = mid->callback; mid->callback = NULL; /* Mark as executed, */ spin_unlock(&mid->mid_lock); if (callback) callback(mid); } #define CIFS_REPARSE_SUPPORT(tcon) \ ((tcon)->posix_extensions || \ (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ FILE_SUPPORTS_REPARSE_POINTS)) #endif /* _CIFS_GLOB_H */ |
| 2 1 4 4 1 2 4 1 6 6 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | // SPDX-License-Identifier: GPL-2.0-or-later /* * PCBC: Propagating Cipher Block Chaining mode * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * Derived from cbc.c * - Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/algapi.h> #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> static int crypto_pcbc_encrypt_segment(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); const u8 *src = walk->src.virt.addr; unsigned int nbytes = walk->nbytes; u8 *dst = walk->dst.virt.addr; u8 * const iv = walk->iv; do { crypto_xor(iv, src, bsize); crypto_cipher_encrypt_one(tfm, dst, iv); crypto_xor_cpy(iv, dst, src, bsize); src += bsize; dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *dst = walk->dst.virt.addr; u8 * const iv = walk->iv; u8 tmpbuf[MAX_CIPHER_BLOCKSIZE]; do { memcpy(tmpbuf, dst, bsize); crypto_xor(iv, dst, bsize); crypto_cipher_encrypt_one(tfm, dst, iv); crypto_xor_cpy(iv, tmpbuf, dst, bsize); dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cipher *cipher = skcipher_cipher_simple(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { if (walk.src.virt.addr == walk.dst.virt.addr) nbytes = crypto_pcbc_encrypt_inplace(req, &walk, cipher); else nbytes = crypto_pcbc_encrypt_segment(req, &walk, cipher); err = skcipher_walk_done(&walk, nbytes); } return err; } static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); const u8 *src = walk->src.virt.addr; unsigned int nbytes = walk->nbytes; u8 *dst = walk->dst.virt.addr; u8 * const iv = walk->iv; do { crypto_cipher_decrypt_one(tfm, dst, src); crypto_xor(dst, iv, bsize); crypto_xor_cpy(iv, dst, src, bsize); src += bsize; dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *dst = walk->dst.virt.addr; u8 * const iv = walk->iv; u8 tmpbuf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(u32)); do { memcpy(tmpbuf, dst, bsize); crypto_cipher_decrypt_one(tfm, dst, dst); crypto_xor(dst, iv, bsize); crypto_xor_cpy(iv, dst, tmpbuf, bsize); dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cipher *cipher = skcipher_cipher_simple(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { if (walk.src.virt.addr == walk.dst.virt.addr) nbytes = crypto_pcbc_decrypt_inplace(req, &walk, cipher); else nbytes = crypto_pcbc_decrypt_segment(req, &walk, cipher); err = skcipher_walk_done(&walk, nbytes); } return err; } static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_instance *inst; int err; inst = skcipher_alloc_instance_simple(tmpl, tb); if (IS_ERR(inst)) return PTR_ERR(inst); inst->alg.encrypt = crypto_pcbc_encrypt; inst->alg.decrypt = crypto_pcbc_decrypt; err = skcipher_register_instance(tmpl, inst); if (err) inst->free(inst); return err; } static struct crypto_template crypto_pcbc_tmpl = { .name = "pcbc", .create = crypto_pcbc_create, .module = THIS_MODULE, }; static int __init crypto_pcbc_module_init(void) { return crypto_register_template(&crypto_pcbc_tmpl); } static void __exit crypto_pcbc_module_exit(void) { crypto_unregister_template(&crypto_pcbc_tmpl); } module_init(crypto_pcbc_module_init); module_exit(crypto_pcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PCBC block cipher mode of operation"); MODULE_ALIAS_CRYPTO("pcbc"); MODULE_IMPORT_NS("CRYPTO_INTERNAL"); |
| 14 14 14 14 14 12 14 4 10 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Randomness driver for virtio * Copyright (C) 2007, 2008 Rusty Russell IBM Corporation */ #include <asm/barrier.h> #include <linux/err.h> #include <linux/hw_random.h> #include <linux/scatterlist.h> #include <linux/spinlock.h> #include <linux/virtio.h> #include <linux/virtio_rng.h> #include <linux/module.h> #include <linux/slab.h> static DEFINE_IDA(rng_index_ida); struct virtrng_info { struct hwrng hwrng; struct virtqueue *vq; char name[25]; int index; bool hwrng_register_done; bool hwrng_removed; /* data transfer */ struct completion have_data; unsigned int data_avail; unsigned int data_idx; /* minimal size returned by rng_buffer_size() */ #if SMP_CACHE_BYTES < 32 u8 data[32]; #else u8 data[SMP_CACHE_BYTES]; #endif }; static void random_recv_done(struct virtqueue *vq) { struct virtrng_info *vi = vq->vdev->priv; unsigned int len; /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ if (!virtqueue_get_buf(vi->vq, &len)) return; smp_store_release(&vi->data_avail, len); complete(&vi->have_data); } static void request_entropy(struct virtrng_info *vi) { struct scatterlist sg; reinit_completion(&vi->have_data); vi->data_idx = 0; sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); virtqueue_kick(vi->vq); } static unsigned int copy_data(struct virtrng_info *vi, void *buf, unsigned int size) { size = min_t(unsigned int, size, vi->data_avail); memcpy(buf, vi->data + vi->data_idx, size); vi->data_idx += size; vi->data_avail -= size; if (vi->data_avail == 0) request_entropy(vi); return size; } static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; struct virtrng_info *vi = (struct virtrng_info *)rng->priv; unsigned int chunk; size_t read; if (vi->hwrng_removed) return -ENODEV; read = 0; /* copy available data */ if (smp_load_acquire(&vi->data_avail)) { chunk = copy_data(vi, buf, size); size -= chunk; read += chunk; } if (!wait) return read; /* We have already copied available entropy, * so either size is 0 or data_avail is 0 */ while (size != 0) { /* data_avail is 0 but a request is pending */ ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; /* if vi->data_avail is 0, we have been interrupted * by a cleanup, but buffer stays in the queue */ if (vi->data_avail == 0) return read; chunk = copy_data(vi, buf + read, size); size -= chunk; read += chunk; } return read; } static void virtio_cleanup(struct hwrng *rng) { struct virtrng_info *vi = (struct virtrng_info *)rng->priv; complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) { int err, index; struct virtrng_info *vi = NULL; vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL); if (!vi) return -ENOMEM; vi->index = index = ida_alloc(&rng_index_ida, GFP_KERNEL); if (index < 0) { err = index; goto err_ida; } sprintf(vi->name, "virtio_rng.%d", index); init_completion(&vi->have_data); vi->hwrng = (struct hwrng) { .read = virtio_read, .cleanup = virtio_cleanup, .priv = (unsigned long)vi, .name = vi->name, }; vdev->priv = vi; /* We expect a single virtqueue. */ vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input"); if (IS_ERR(vi->vq)) { err = PTR_ERR(vi->vq); goto err_find; } virtio_device_ready(vdev); /* we always have a pending entropy request */ request_entropy(vi); return 0; err_find: ida_free(&rng_index_ida, index); err_ida: kfree(vi); return err; } static void remove_common(struct virtio_device *vdev) { struct virtrng_info *vi = vdev->priv; vi->hwrng_removed = true; vi->data_avail = 0; vi->data_idx = 0; complete(&vi->have_data); if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); virtio_reset_device(vdev); vdev->config->del_vqs(vdev); ida_free(&rng_index_ida, vi->index); kfree(vi); } static int virtrng_probe(struct virtio_device *vdev) { return probe_common(vdev); } static void virtrng_remove(struct virtio_device *vdev) { remove_common(vdev); } static void virtrng_scan(struct virtio_device *vdev) { struct virtrng_info *vi = vdev->priv; int err; err = hwrng_register(&vi->hwrng); if (!err) vi->hwrng_register_done = true; } static int virtrng_freeze(struct virtio_device *vdev) { remove_common(vdev); return 0; } static int virtrng_restore(struct virtio_device *vdev) { int err; err = probe_common(vdev); if (!err) { struct virtrng_info *vi = vdev->priv; /* * Set hwrng_removed to ensure that virtio_read() * does not block waiting for data before the * registration is complete. */ vi->hwrng_removed = true; err = hwrng_register(&vi->hwrng); if (!err) { vi->hwrng_register_done = true; vi->hwrng_removed = false; } } return err; } static const struct virtio_device_id id_table[] = { { VIRTIO_ID_RNG, VIRTIO_DEV_ANY_ID }, { 0 }, }; static struct virtio_driver virtio_rng_driver = { .driver.name = KBUILD_MODNAME, .id_table = id_table, .probe = virtrng_probe, .remove = virtrng_remove, .scan = virtrng_scan, .freeze = pm_sleep_ptr(virtrng_freeze), .restore = pm_sleep_ptr(virtrng_restore), }; module_virtio_driver(virtio_rng_driver); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio random number driver"); MODULE_LICENSE("GPL"); |
| 2 111 4 2 106 97 97 76 74 105 90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 | // SPDX-License-Identifier: GPL-2.0-or-later /* Diffie-Hellman Key Agreement Method [RFC2631] * * Copyright (c) 2016, Intel Corporation * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com> */ #include <linux/fips.h> #include <linux/module.h> #include <crypto/internal/kpp.h> #include <crypto/kpp.h> #include <crypto/dh.h> #include <crypto/rng.h> #include <linux/mpi.h> struct dh_ctx { MPI p; /* Value is guaranteed to be set. */ MPI g; /* Value is guaranteed to be set. */ MPI xa; /* Value is guaranteed to be set. */ }; static void dh_clear_ctx(struct dh_ctx *ctx) { mpi_free(ctx->p); mpi_free(ctx->g); mpi_free(ctx->xa); memset(ctx, 0, sizeof(*ctx)); } /* * If base is g we compute the public key * ya = g^xa mod p; [RFC2631 sec 2.1.1] * else if base if the counterpart public key we compute the shared secret * ZZ = yb^xa mod p; [RFC2631 sec 2.1.1] */ static int _compute_val(const struct dh_ctx *ctx, MPI base, MPI val) { /* val = base^xa mod p */ return mpi_powm(val, base, ctx->xa, ctx->p); } static inline struct dh_ctx *dh_get_ctx(struct crypto_kpp *tfm) { return kpp_tfm_ctx(tfm); } static int dh_check_params_length(unsigned int p_len) { if (fips_enabled) return (p_len < 2048) ? -EINVAL : 0; return (p_len < 1536) ? -EINVAL : 0; } static int dh_set_params(struct dh_ctx *ctx, struct dh *params) { if (dh_check_params_length(params->p_size << 3)) return -EINVAL; ctx->p = mpi_read_raw_data(params->p, params->p_size); if (!ctx->p) return -EINVAL; ctx->g = mpi_read_raw_data(params->g, params->g_size); if (!ctx->g) return -EINVAL; return 0; } static int dh_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { struct dh_ctx *ctx = dh_get_ctx(tfm); struct dh params; /* Free the old MPI key if any */ dh_clear_ctx(ctx); if (crypto_dh_decode_key(buf, len, ¶ms) < 0) goto err_clear_ctx; if (dh_set_params(ctx, ¶ms) < 0) goto err_clear_ctx; ctx->xa = mpi_read_raw_data(params.key, params.key_size); if (!ctx->xa) goto err_clear_ctx; return 0; err_clear_ctx: dh_clear_ctx(ctx); return -EINVAL; } /* * SP800-56A public key verification: * * * For the safe-prime groups in FIPS mode, Q can be computed * trivially from P and a full validation according to SP800-56A * section 5.6.2.3.1 is performed. * * * For all other sets of group parameters, only a partial validation * according to SP800-56A section 5.6.2.3.2 is performed. */ static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y) { MPI val, q; int ret; if (!fips_enabled) return 0; if (unlikely(!ctx->p)) return -EINVAL; /* * Step 1: Verify that 2 <= y <= p - 2. * * The upper limit check is actually y < p instead of y < p - 1 * in order to save one mpi_sub_ui() invocation here. Note that * p - 1 is the non-trivial element of the subgroup of order 2 and * thus, the check on y^q below would fail if y == p - 1. */ if (mpi_cmp_ui(y, 1) < 1 || mpi_cmp(y, ctx->p) >= 0) return -EINVAL; /* * Step 2: Verify that 1 = y^q mod p * * For the safe-prime groups q = (p - 1)/2. */ val = mpi_alloc(0); if (!val) return -ENOMEM; q = mpi_alloc(mpi_get_nlimbs(ctx->p)); if (!q) { mpi_free(val); return -ENOMEM; } /* * ->p is odd, so no need to explicitly subtract one * from it before shifting to the right. */ ret = mpi_rshift(q, ctx->p, 1) ?: mpi_powm(val, y, q, ctx->p); mpi_free(q); if (ret) { mpi_free(val); return ret; } ret = mpi_cmp_ui(val, 1); mpi_free(val); if (ret != 0) return -EINVAL; return 0; } static int dh_compute_value(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct dh_ctx *ctx = dh_get_ctx(tfm); MPI base, val = mpi_alloc(0); int ret = 0; int sign; if (!val) return -ENOMEM; if (unlikely(!ctx->xa)) { ret = -EINVAL; goto err_free_val; } if (req->src) { base = mpi_read_raw_from_sgl(req->src, req->src_len); if (!base) { ret = -EINVAL; goto err_free_val; } ret = dh_is_pubkey_valid(ctx, base); if (ret) goto err_free_base; } else { base = ctx->g; } ret = _compute_val(ctx, base, val); if (ret) goto err_free_base; if (fips_enabled) { /* SP800-56A rev3 5.7.1.1 check: Validation of shared secret */ if (req->src) { MPI pone; /* z <= 1 */ if (mpi_cmp_ui(val, 1) < 1) { ret = -EBADMSG; goto err_free_base; } /* z == p - 1 */ pone = mpi_alloc(0); if (!pone) { ret = -ENOMEM; goto err_free_base; } ret = mpi_sub_ui(pone, ctx->p, 1); if (!ret && !mpi_cmp(pone, val)) ret = -EBADMSG; mpi_free(pone); if (ret) goto err_free_base; /* SP800-56A rev 3 5.6.2.1.3 key check */ } else { if (dh_is_pubkey_valid(ctx, val)) { ret = -EAGAIN; goto err_free_val; } } } ret = mpi_write_to_sgl(val, req->dst, req->dst_len, &sign); if (ret) goto err_free_base; if (sign < 0) ret = -EBADMSG; err_free_base: if (req->src) mpi_free(base); err_free_val: mpi_free(val); return ret; } static unsigned int dh_max_size(struct crypto_kpp *tfm) { struct dh_ctx *ctx = dh_get_ctx(tfm); return mpi_get_size(ctx->p); } static void dh_exit_tfm(struct crypto_kpp *tfm) { struct dh_ctx *ctx = dh_get_ctx(tfm); dh_clear_ctx(ctx); } static struct kpp_alg dh = { .set_secret = dh_set_secret, .generate_public_key = dh_compute_value, .compute_shared_secret = dh_compute_value, .max_size = dh_max_size, .exit = dh_exit_tfm, .base = { .cra_name = "dh", .cra_driver_name = "dh-generic", .cra_priority = 100, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct dh_ctx), }, }; struct dh_safe_prime { unsigned int max_strength; unsigned int p_size; const char *p; }; static const char safe_prime_g[] = { 2 }; struct dh_safe_prime_instance_ctx { struct crypto_kpp_spawn dh_spawn; const struct dh_safe_prime *safe_prime; }; struct dh_safe_prime_tfm_ctx { struct crypto_kpp *dh_tfm; }; static void dh_safe_prime_free_instance(struct kpp_instance *inst) { struct dh_safe_prime_instance_ctx *ctx = kpp_instance_ctx(inst); crypto_drop_kpp(&ctx->dh_spawn); kfree(inst); } static inline struct dh_safe_prime_instance_ctx *dh_safe_prime_instance_ctx( struct crypto_kpp *tfm) { return kpp_instance_ctx(kpp_alg_instance(tfm)); } static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm) { struct dh_safe_prime_instance_ctx *inst_ctx = dh_safe_prime_instance_ctx(tfm); struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm); tfm_ctx->dh_tfm = crypto_spawn_kpp(&inst_ctx->dh_spawn); if (IS_ERR(tfm_ctx->dh_tfm)) return PTR_ERR(tfm_ctx->dh_tfm); kpp_set_reqsize(tfm, sizeof(struct kpp_request) + crypto_kpp_reqsize(tfm_ctx->dh_tfm)); return 0; } static void dh_safe_prime_exit_tfm(struct crypto_kpp *tfm) { struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm); crypto_free_kpp(tfm_ctx->dh_tfm); } static u64 __add_u64_to_be(__be64 *dst, unsigned int n, u64 val) { unsigned int i; for (i = n; val && i > 0; --i) { u64 tmp = be64_to_cpu(dst[i - 1]); tmp += val; val = tmp >= val ? 0 : 1; dst[i - 1] = cpu_to_be64(tmp); } return val; } static void *dh_safe_prime_gen_privkey(const struct dh_safe_prime *safe_prime, unsigned int *key_size) { unsigned int n, oversampling_size; __be64 *key; int err; u64 h, o; /* * Generate a private key following NIST SP800-56Ar3, * sec. 5.6.1.1.1 and 5.6.1.1.3 resp.. * * 5.6.1.1.1: choose key length N such that * 2 * ->max_strength <= N <= log2(q) + 1 = ->p_size * 8 - 1 * with q = (p - 1) / 2 for the safe-prime groups. * Choose the lower bound's next power of two for N in order to * avoid excessively large private keys while still * maintaining some extra reserve beyond the bare minimum in * most cases. Note that for each entry in safe_prime_groups[], * the following holds for such N: * - N >= 256, in particular it is a multiple of 2^6 = 64 * bits and * - N < log2(q) + 1, i.e. N respects the upper bound. */ n = roundup_pow_of_two(2 * safe_prime->max_strength); WARN_ON_ONCE(n & ((1u << 6) - 1)); n >>= 6; /* Convert N into units of u64. */ /* * Reserve one extra u64 to hold the extra random bits * required as per 5.6.1.1.3. */ oversampling_size = (n + 1) * sizeof(__be64); key = kmalloc(oversampling_size, GFP_KERNEL); if (!key) return ERR_PTR(-ENOMEM); /* * 5.6.1.1.3, step 3 (and implicitly step 4): obtain N + 64 * random bits and interpret them as a big endian integer. */ err = -EFAULT; if (crypto_get_default_rng()) goto out_err; err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)key, oversampling_size); crypto_put_default_rng(); if (err) goto out_err; /* * 5.6.1.1.3, step 5 is implicit: 2^N < q and thus, * M = min(2^N, q) = 2^N. * * For step 6, calculate * key = (key[] mod (M - 1)) + 1 = (key[] mod (2^N - 1)) + 1. * * In order to avoid expensive divisions, note that * 2^N mod (2^N - 1) = 1 and thus, for any integer h, * 2^N * h mod (2^N - 1) = h mod (2^N - 1) always holds. * The big endian integer key[] composed of n + 1 64bit words * may be written as key[] = h * 2^N + l, with h = key[0] * representing the 64 most significant bits and l * corresponding to the remaining 2^N bits. With the remark * from above, * h * 2^N + l mod (2^N - 1) = l + h mod (2^N - 1). * As both, l and h are less than 2^N, their sum after * this first reduction is guaranteed to be <= 2^(N + 1) - 2. * Or equivalently, that their sum can again be written as * h' * 2^N + l' with h' now either zero or one and if one, * then l' <= 2^N - 2. Thus, all bits at positions >= N will * be zero after a second reduction: * h' * 2^N + l' mod (2^N - 1) = l' + h' mod (2^N - 1). * At this point, it is still possible that * l' + h' = 2^N - 1, i.e. that l' + h' mod (2^N - 1) * is zero. This condition will be detected below by means of * the final increment overflowing in this case. */ h = be64_to_cpu(key[0]); h = __add_u64_to_be(key + 1, n, h); h = __add_u64_to_be(key + 1, n, h); WARN_ON_ONCE(h); /* Increment to obtain the final result. */ o = __add_u64_to_be(key + 1, n, 1); /* * The overflow bit o from the increment is either zero or * one. If zero, key[1:n] holds the final result in big-endian * order. If one, key[1:n] is zero now, but needs to be set to * one, c.f. above. */ if (o) key[n] = cpu_to_be64(1); /* n is in units of u64, convert to bytes. */ *key_size = n << 3; /* Strip the leading extra __be64, which is (virtually) zero by now. */ memmove(key, &key[1], *key_size); return key; out_err: kfree_sensitive(key); return ERR_PTR(err); } static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer, unsigned int len) { struct dh_safe_prime_instance_ctx *inst_ctx = dh_safe_prime_instance_ctx(tfm); struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm); struct dh params = {}; void *buf = NULL, *key = NULL; unsigned int buf_size; int err; if (buffer) { err = __crypto_dh_decode_key(buffer, len, ¶ms); if (err) return err; if (params.p_size || params.g_size) return -EINVAL; } params.p = inst_ctx->safe_prime->p; params.p_size = inst_ctx->safe_prime->p_size; params.g = safe_prime_g; params.g_size = sizeof(safe_prime_g); if (!params.key_size) { key = dh_safe_prime_gen_privkey(inst_ctx->safe_prime, ¶ms.key_size); if (IS_ERR(key)) return PTR_ERR(key); params.key = key; } buf_size = crypto_dh_key_len(¶ms); buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) { err = -ENOMEM; goto out; } err = crypto_dh_encode_key(buf, buf_size, ¶ms); if (err) goto out; err = crypto_kpp_set_secret(tfm_ctx->dh_tfm, buf, buf_size); out: kfree_sensitive(buf); kfree_sensitive(key); return err; } static void dh_safe_prime_complete_req(void *data, int err) { struct kpp_request *req = data; kpp_request_complete(req, err); } static struct kpp_request *dh_safe_prime_prepare_dh_req(struct kpp_request *req) { struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(crypto_kpp_reqtfm(req)); struct kpp_request *dh_req = kpp_request_ctx(req); kpp_request_set_tfm(dh_req, tfm_ctx->dh_tfm); kpp_request_set_callback(dh_req, req->base.flags, dh_safe_prime_complete_req, req); kpp_request_set_input(dh_req, req->src, req->src_len); kpp_request_set_output(dh_req, req->dst, req->dst_len); return dh_req; } static int dh_safe_prime_generate_public_key(struct kpp_request *req) { struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req); return crypto_kpp_generate_public_key(dh_req); } static int dh_safe_prime_compute_shared_secret(struct kpp_request *req) { struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req); return crypto_kpp_compute_shared_secret(dh_req); } static unsigned int dh_safe_prime_max_size(struct crypto_kpp *tfm) { struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm); return crypto_kpp_maxsize(tfm_ctx->dh_tfm); } static int __maybe_unused __dh_safe_prime_create( struct crypto_template *tmpl, struct rtattr **tb, const struct dh_safe_prime *safe_prime) { struct kpp_instance *inst; struct dh_safe_prime_instance_ctx *ctx; const char *dh_name; struct kpp_alg *dh_alg; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_KPP, &mask); if (err) return err; dh_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(dh_name)) return PTR_ERR(dh_name); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = kpp_instance_ctx(inst); err = crypto_grab_kpp(&ctx->dh_spawn, kpp_crypto_instance(inst), dh_name, 0, mask); if (err) goto err_free_inst; err = -EINVAL; dh_alg = crypto_spawn_kpp_alg(&ctx->dh_spawn); if (strcmp(dh_alg->base.cra_name, "dh")) goto err_free_inst; ctx->safe_prime = safe_prime; err = crypto_inst_setname(kpp_crypto_instance(inst), tmpl->name, &dh_alg->base); if (err) goto err_free_inst; inst->alg.set_secret = dh_safe_prime_set_secret; inst->alg.generate_public_key = dh_safe_prime_generate_public_key; inst->alg.compute_shared_secret = dh_safe_prime_compute_shared_secret; inst->alg.max_size = dh_safe_prime_max_size; inst->alg.init = dh_safe_prime_init_tfm; inst->alg.exit = dh_safe_prime_exit_tfm; inst->alg.base.cra_priority = dh_alg->base.cra_priority; inst->alg.base.cra_module = THIS_MODULE; inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx); inst->free = dh_safe_prime_free_instance; err = kpp_register_instance(tmpl, inst); if (err) goto err_free_inst; return 0; err_free_inst: dh_safe_prime_free_instance(inst); return err; } #ifdef CONFIG_CRYPTO_DH_RFC7919_GROUPS static const struct dh_safe_prime ffdhe2048_prime = { .max_strength = 112, .p_size = 256, .p = "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a" "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95" "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9" "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a" "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0" "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35" "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72" "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a" "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb" "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4" "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70" "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61" "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83" "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05" "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa" "\x88\x6b\x42\x38\x61\x28\x5c\x97\xff\xff\xff\xff\xff\xff\xff\xff", }; static const struct dh_safe_prime ffdhe3072_prime = { .max_strength = 128, .p_size = 384, .p = "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a" "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95" "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9" "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a" "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0" "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35" "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72" "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a" "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb" "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4" "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70" "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61" "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83" "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05" "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa" "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b" "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07" "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c" "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44" "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff" "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d" "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e" "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c" "\x25\xe4\x1d\x2b\x66\xc6\x2e\x37\xff\xff\xff\xff\xff\xff\xff\xff", }; static const struct dh_safe_prime ffdhe4096_prime = { .max_strength = 152, .p_size = 512, .p = "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a" "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95" "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9" "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a" "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0" "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35" "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72" "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a" "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb" "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4" "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70" "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61" "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83" "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05" "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa" "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b" "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07" "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c" "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44" "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff" "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d" "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e" "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c" "\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb" "\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18" "\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a" "\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32" "\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38" "\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c" "\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf" "\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1" "\xc6\x8a\x00\x7e\x5e\x65\x5f\x6a\xff\xff\xff\xff\xff\xff\xff\xff", }; static const struct dh_safe_prime ffdhe6144_prime = { .max_strength = 176, .p_size = 768, .p = "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a" "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95" "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9" "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a" "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0" "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35" "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72" "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a" "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb" "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4" "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70" "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61" "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83" "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05" "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa" "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b" "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07" "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c" "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44" "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff" "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d" "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e" "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c" "\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb" "\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18" "\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a" "\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32" "\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38" "\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c" "\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf" "\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1" "\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a" "\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6" "\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c" "\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71" "\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77" "\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8" "\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e" "\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4" "\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92" "\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82" "\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c" "\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46" "\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17" "\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04" "\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69" "\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4" "\xa4\x0e\x32\x9c\xd0\xe4\x0e\x65\xff\xff\xff\xff\xff\xff\xff\xff", }; static const struct dh_safe_prime ffdhe8192_prime = { .max_strength = 200, .p_size = 1024, .p = "\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a" "\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95" "\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9" "\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a" "\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0" "\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35" "\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72" "\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a" "\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb" "\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4" "\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70" "\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61" "\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83" "\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05" "\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa" "\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b" "\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07" "\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c" "\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44" "\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff" "\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d" "\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e" "\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c" "\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb" "\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18" "\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a" "\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32" "\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38" "\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c" "\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf" "\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1" "\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a" "\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6" "\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c" "\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71" "\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77" "\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8" "\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e" "\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4" "\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92" "\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82" "\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c" "\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46" "\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17" "\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04" "\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69" "\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4" "\xa4\x0e\x32\x9c\xcf\xf4\x6a\xaa\x36\xad\x00\x4c\xf6\x00\xc8\x38" "\x1e\x42\x5a\x31\xd9\x51\xae\x64\xfd\xb2\x3f\xce\xc9\x50\x9d\x43" "\x68\x7f\xeb\x69\xed\xd1\xcc\x5e\x0b\x8c\xc3\xbd\xf6\x4b\x10\xef" "\x86\xb6\x31\x42\xa3\xab\x88\x29\x55\x5b\x2f\x74\x7c\x93\x26\x65" "\xcb\x2c\x0f\x1c\xc0\x1b\xd7\x02\x29\x38\x88\x39\xd2\xaf\x05\xe4" "\x54\x50\x4a\xc7\x8b\x75\x82\x82\x28\x46\xc0\xba\x35\xc3\x5f\x5c" "\x59\x16\x0c\xc0\x46\xfd\x82\x51\x54\x1f\xc6\x8c\x9c\x86\xb0\x22" "\xbb\x70\x99\x87\x6a\x46\x0e\x74\x51\xa8\xa9\x31\x09\x70\x3f\xee" "\x1c\x21\x7e\x6c\x38\x26\xe5\x2c\x51\xaa\x69\x1e\x0e\x42\x3c\xfc" "\x99\xe9\xe3\x16\x50\xc1\x21\x7b\x62\x48\x16\xcd\xad\x9a\x95\xf9" "\xd5\xb8\x01\x94\x88\xd9\xc0\xa0\xa1\xfe\x30\x75\xa5\x77\xe2\x31" "\x83\xf8\x1d\x4a\x3f\x2f\xa4\x57\x1e\xfc\x8c\xe0\xba\x8a\x4f\xe8" "\xb6\x85\x5d\xfe\x72\xb0\xa6\x6e\xde\xd2\xfb\xab\xfb\xe5\x8a\x30" "\xfa\xfa\xbe\x1c\x5d\x71\xa8\x7e\x2f\x74\x1e\xf8\xc1\xfe\x86\xfe" "\xa6\xbb\xfd\xe5\x30\x67\x7f\x0d\x97\xd1\x1d\x49\xf7\xa8\x44\x3d" "\x08\x22\xe5\x06\xa9\xf4\x61\x4e\x01\x1e\x2a\x94\x83\x8f\xf8\x8c" "\xd6\x8c\x8b\xb7\xc5\xc6\x42\x4c\xff\xff\xff\xff\xff\xff\xff\xff", }; static int dh_ffdhe2048_create(struct crypto_template *tmpl, struct rtattr **tb) { return __dh_safe_prime_create(tmpl, tb, &ffdhe2048_prime); } static int dh_ffdhe3072_create(struct crypto_template *tmpl, struct rtattr **tb) { return __dh_safe_prime_create(tmpl, tb, &ffdhe3072_prime); } static int dh_ffdhe4096_create(struct crypto_template *tmpl, struct rtattr **tb) { return __dh_safe_prime_create(tmpl, tb, &ffdhe4096_prime); } static int dh_ffdhe6144_create(struct crypto_template *tmpl, struct rtattr **tb) { return __dh_safe_prime_create(tmpl, tb, &ffdhe6144_prime); } static int dh_ffdhe8192_create(struct crypto_template *tmpl, struct rtattr **tb) { return __dh_safe_prime_create(tmpl, tb, &ffdhe8192_prime); } static struct crypto_template crypto_ffdhe_templates[] = { { .name = "ffdhe2048", .create = dh_ffdhe2048_create, .module = THIS_MODULE, }, { .name = "ffdhe3072", .create = dh_ffdhe3072_create, .module = THIS_MODULE, }, { .name = "ffdhe4096", .create = dh_ffdhe4096_create, .module = THIS_MODULE, }, { .name = "ffdhe6144", .create = dh_ffdhe6144_create, .module = THIS_MODULE, }, { .name = "ffdhe8192", .create = dh_ffdhe8192_create, .module = THIS_MODULE, }, }; #else /* ! CONFIG_CRYPTO_DH_RFC7919_GROUPS */ static struct crypto_template crypto_ffdhe_templates[] = {}; #endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */ static int __init dh_init(void) { int err; err = crypto_register_kpp(&dh); if (err) return err; err = crypto_register_templates(crypto_ffdhe_templates, ARRAY_SIZE(crypto_ffdhe_templates)); if (err) { crypto_unregister_kpp(&dh); return err; } return 0; } static void __exit dh_exit(void) { crypto_unregister_templates(crypto_ffdhe_templates, ARRAY_SIZE(crypto_ffdhe_templates)); crypto_unregister_kpp(&dh); } module_init(dh_init); module_exit(dh_exit); MODULE_ALIAS_CRYPTO("dh"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DH generic algorithm"); |
| 39 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "timers.h" #include "device.h" #include "peer.h" #include "queueing.h" #include "socket.h" /* * - Timer for retransmitting the handshake if we don't hear back after * `REKEY_TIMEOUT + jitter` ms. * * - Timer for sending empty packet if we have received a packet but after have * not sent one for `KEEPALIVE_TIMEOUT` ms. * * - Timer for initiating new handshake if we have sent a packet but after have * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + * jitter` ms. * * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms * if no new keys have been received. * * - Timer for, if enabled, sending an empty authenticated packet every user- * specified seconds. */ static inline void mod_peer_timer(struct wg_peer *peer, struct timer_list *timer, unsigned long expires) { rcu_read_lock_bh(); if (likely(netif_running(peer->device->dev) && !READ_ONCE(peer->is_dead))) mod_timer(timer, expires); rcu_read_unlock_bh(); } static void wg_expired_retransmit_handshake(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_retransmit_handshake); if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2); timer_delete(&peer->timer_send_keepalive); /* We drop all packets without a keypair and don't try again, * if we try unsuccessfully for too long to make a handshake. */ wg_packet_purge_staged_packets(peer); /* We set a timer for destroying any residue that might be left * of a partial exchange. */ if (!timer_pending(&peer->timer_zero_key_material)) mod_peer_timer(peer, &peer->timer_zero_key_material, jiffies + REJECT_AFTER_TIME * 3 * HZ); } else { ++peer->timer_handshake_attempts; pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)REKEY_TIMEOUT, peer->timer_handshake_attempts + 1); /* We clear the endpoint address src address, in case this is * the cause of trouble. */ wg_socket_clear_peer_endpoint_src(peer); wg_packet_send_queued_handshake_initiation(peer, true); } } static void wg_expired_send_keepalive(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_send_keepalive); wg_packet_send_keepalive(peer); if (peer->timer_need_another_keepalive) { peer->timer_need_another_keepalive = false; mod_peer_timer(peer, &peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT * HZ); } } static void wg_expired_new_handshake(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_new_handshake); pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)); /* We clear the endpoint address src address, in case this is the cause * of trouble. */ wg_socket_clear_peer_endpoint_src(peer); wg_packet_send_queued_handshake_initiation(peer, false); } static void wg_expired_zero_key_material(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_zero_key_material); rcu_read_lock_bh(); if (!READ_ONCE(peer->is_dead)) { wg_peer_get(peer); if (!queue_work(peer->device->handshake_send_wq, &peer->clear_peer_work)) /* If the work was already on the queue, we want to drop * the extra reference. */ wg_peer_put(peer); } rcu_read_unlock_bh(); } static void wg_queued_expired_zero_key_material(struct work_struct *work) { struct wg_peer *peer = container_of(work, struct wg_peer, clear_peer_work); pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)REJECT_AFTER_TIME * 3); wg_noise_handshake_clear(&peer->handshake); wg_noise_keypairs_clear(&peer->keypairs); wg_peer_put(peer); } static void wg_expired_send_persistent_keepalive(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_persistent_keepalive); if (likely(peer->persistent_keepalive_interval)) wg_packet_send_keepalive(peer); } /* Should be called after an authenticated data packet is sent. */ void wg_timers_data_sent(struct wg_peer *peer) { if (!timer_pending(&peer->timer_new_handshake)) mod_peer_timer(peer, &peer->timer_new_handshake, jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + get_random_u32_below(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); } /* Should be called after an authenticated data packet is received. */ void wg_timers_data_received(struct wg_peer *peer) { if (likely(netif_running(peer->device->dev))) { if (!timer_pending(&peer->timer_send_keepalive)) mod_peer_timer(peer, &peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT * HZ); else peer->timer_need_another_keepalive = true; } } /* Should be called after any type of authenticated packet is sent, whether * keepalive, data, or handshake. */ void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) { timer_delete(&peer->timer_send_keepalive); } /* Should be called after any type of authenticated packet is received, whether * keepalive, data, or handshake. */ void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) { timer_delete(&peer->timer_new_handshake); } /* Should be called after a handshake initiation message is sent. */ void wg_timers_handshake_initiated(struct wg_peer *peer) { mod_peer_timer(peer, &peer->timer_retransmit_handshake, jiffies + REKEY_TIMEOUT * HZ + get_random_u32_below(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); } /* Should be called after a handshake response message is received and processed * or when getting key confirmation via the first data message. */ void wg_timers_handshake_complete(struct wg_peer *peer) { timer_delete(&peer->timer_retransmit_handshake); peer->timer_handshake_attempts = 0; peer->sent_lastminute_handshake = false; ktime_get_real_ts64(&peer->walltime_last_handshake); } /* Should be called after an ephemeral key is created, which is before sending a * handshake response or after receiving a handshake response. */ void wg_timers_session_derived(struct wg_peer *peer) { mod_peer_timer(peer, &peer->timer_zero_key_material, jiffies + REJECT_AFTER_TIME * 3 * HZ); } /* Should be called before a packet with authentication, whether * keepalive, data, or handshakem is sent, or after one is received. */ void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) { if (peer->persistent_keepalive_interval) mod_peer_timer(peer, &peer->timer_persistent_keepalive, jiffies + peer->persistent_keepalive_interval * HZ); } void wg_timers_init(struct wg_peer *peer) { timer_setup(&peer->timer_retransmit_handshake, wg_expired_retransmit_handshake, 0); timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); timer_setup(&peer->timer_zero_key_material, wg_expired_zero_key_material, 0); timer_setup(&peer->timer_persistent_keepalive, wg_expired_send_persistent_keepalive, 0); INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); peer->timer_handshake_attempts = 0; peer->sent_lastminute_handshake = false; peer->timer_need_another_keepalive = false; } void wg_timers_stop(struct wg_peer *peer) { timer_delete_sync(&peer->timer_retransmit_handshake); timer_delete_sync(&peer->timer_send_keepalive); timer_delete_sync(&peer->timer_new_handshake); timer_delete_sync(&peer->timer_zero_key_material); timer_delete_sync(&peer->timer_persistent_keepalive); flush_work(&peer->clear_peer_work); } |
| 89 1 81 79 81 89 89 82 82 27 27 88 89 45 35 10 250 222 30 112 177 130 56 2 2 63 4 55 5 15 1 16 16 63 16 6 41 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/switchdev.h> #include "br_private.h" static struct static_key_false br_switchdev_tx_fwd_offload; static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p, const struct sk_buff *skb) { if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) return false; if (br_multicast_igmp_type(skb)) return false; return (p->flags & BR_TX_FWD_OFFLOAD) && (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom); } bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) return false; return BR_INPUT_SKB_CB(skb)->tx_fwd_offload; } void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb) { skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb); } /* Mark the frame for TX forwarding offload if this egress port supports it */ void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, struct sk_buff *skb) { if (nbp_switchdev_can_offload_tx_fwd(p, skb)) BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true; } /* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms * that the skb has been already forwarded to, to avoid further cloning to * other ports in the same hwdom by making nbp_switchdev_allowed_egress() * return false. */ void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, struct sk_buff *skb) { if (nbp_switchdev_can_offload_tx_fwd(p, skb)) set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms); } void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb) { if (p->hwdom) BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom; } bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb) { struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb); return !test_bit(p->hwdom, &cb->fwd_hwdoms) && (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom); } /* Flags that can be offloaded to hardware */ #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | BR_PORT_MAB | \ BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \ BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST) int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long flags, unsigned long mask, struct netlink_ext_ack *extack) { struct switchdev_attr attr = { .orig_dev = p->dev, }; struct switchdev_notifier_port_attr_info info = { .attr = &attr, }; int err; mask &= BR_PORT_FLAGS_HW_OFFLOAD; if (!mask) return 0; attr.id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS; attr.u.brport_flags.val = flags; attr.u.brport_flags.mask = mask; /* We run from atomic context here */ err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev, &info.info, extack); err = notifier_to_errno(err); if (err == -EOPNOTSUPP) return 0; if (err) { NL_SET_ERR_MSG_WEAK_MOD(extack, "bridge flag offload is not supported"); return -EOPNOTSUPP; } attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS; attr.flags = SWITCHDEV_F_DEFER; err = switchdev_port_attr_set(p->dev, &attr, extack); if (err) { NL_SET_ERR_MSG_WEAK_MOD(extack, "error setting offload flag on port"); return err; } return 0; } static void br_switchdev_fdb_populate(struct net_bridge *br, struct switchdev_notifier_fdb_info *item, const struct net_bridge_fdb_entry *fdb, const void *ctx) { const struct net_bridge_port *p = READ_ONCE(fdb->dst); item->addr = fdb->key.addr.addr; item->vid = fdb->key.vlan_id; item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); item->offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); item->is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); item->locked = false; item->info.dev = (!p || item->is_local) ? br->dev : p->dev; item->info.ctx = ctx; } void br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type) { struct switchdev_notifier_fdb_info item; if (test_bit(BR_FDB_LOCKED, &fdb->flags)) return; /* Entries with these flags were created using ndm_state == NUD_REACHABLE, * ndm_flags == NTF_MASTER( | NTF_STICKY), ext_flags == 0 by something * equivalent to 'bridge fdb add ... master dynamic (sticky)'. * Drivers don't know how to deal with these, so don't notify them to * avoid confusing them. */ if (test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags) && !test_bit(BR_FDB_STATIC, &fdb->flags) && !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) return; br_switchdev_fdb_populate(br, &item, fdb, NULL); switch (type) { case RTM_DELNEIGH: call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE, item.info.dev, &item.info, NULL); break; case RTM_NEWNEIGH: call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE, item.info.dev, &item.info, NULL); break; } } int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, bool changed, struct netlink_ext_ack *extack) { struct switchdev_obj_port_vlan v = { .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, .vid = vid, .changed = changed, }; return switchdev_port_obj_add(dev, &v.obj, extack); } int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) { struct switchdev_obj_port_vlan v = { .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid = vid, }; return switchdev_port_obj_del(dev, &v.obj); } static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining) { struct net_bridge *br = joining->br; struct net_bridge_port *p; int hwdom; /* joining is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) { joining->hwdom = p->hwdom; return 0; } } hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1); if (hwdom >= BR_HWDOM_MAX) return -EBUSY; set_bit(hwdom, &br->busy_hwdoms); joining->hwdom = hwdom; return 0; } static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving) { struct net_bridge *br = leaving->br; struct net_bridge_port *p; /* leaving is no longer in the port list. */ list_for_each_entry(p, &br->port_list, list) { if (p->hwdom == leaving->hwdom) return; } clear_bit(leaving->hwdom, &br->busy_hwdoms); } static int nbp_switchdev_add(struct net_bridge_port *p, struct netdev_phys_item_id ppid, bool tx_fwd_offload, struct netlink_ext_ack *extack) { int err; if (p->offload_count) { /* Prevent unsupported configurations such as a bridge port * which is a bonding interface, and the member ports are from * different hardware switches. */ if (!netdev_phys_item_id_same(&p->ppid, &ppid)) { NL_SET_ERR_MSG_MOD(extack, "Same bridge port cannot be offloaded by two physical switches"); return -EBUSY; } /* Tolerate drivers that call switchdev_bridge_port_offload() * more than once for the same bridge port, such as when the * bridge port is an offloaded bonding/team interface. */ p->offload_count++; return 0; } p->ppid = ppid; p->offload_count = 1; err = nbp_switchdev_hwdom_set(p); if (err) return err; if (tx_fwd_offload) { p->flags |= BR_TX_FWD_OFFLOAD; static_branch_inc(&br_switchdev_tx_fwd_offload); } return 0; } static void nbp_switchdev_del(struct net_bridge_port *p) { if (WARN_ON(!p->offload_count)) return; p->offload_count--; if (p->offload_count) return; if (p->hwdom) nbp_switchdev_hwdom_put(p); if (p->flags & BR_TX_FWD_OFFLOAD) { p->flags &= ~BR_TX_FWD_OFFLOAD; static_branch_dec(&br_switchdev_tx_fwd_offload); } } static int br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, const struct net_bridge_fdb_entry *fdb, unsigned long action, const void *ctx) { struct switchdev_notifier_fdb_info item; int err; br_switchdev_fdb_populate(br, &item, fdb, ctx); err = nb->notifier_call(nb, action, &item); return notifier_to_errno(err); } static int br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, struct notifier_block *nb) { struct net_bridge_fdb_entry *fdb; struct net_bridge *br; unsigned long action; int err = 0; if (!nb) return 0; if (!netif_is_bridge_master(br_dev)) return -EINVAL; br = netdev_priv(br_dev); if (adding) action = SWITCHDEV_FDB_ADD_TO_DEVICE; else action = SWITCHDEV_FDB_DEL_TO_DEVICE; rcu_read_lock(); hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { err = br_switchdev_fdb_replay_one(br, nb, fdb, action, ctx); if (err) break; } rcu_read_unlock(); return err; } static int br_switchdev_vlan_attr_replay(struct net_device *br_dev, const void *ctx, struct notifier_block *nb, struct netlink_ext_ack *extack) { struct switchdev_notifier_port_attr_info attr_info = { .info = { .dev = br_dev, .extack = extack, .ctx = ctx, }, }; struct net_bridge *br = netdev_priv(br_dev); struct net_bridge_vlan_group *vg; struct switchdev_attr attr; struct net_bridge_vlan *v; int err; attr_info.attr = &attr; attr.orig_dev = br_dev; vg = br_vlan_group(br); if (!vg) return 0; list_for_each_entry(v, &vg->vlan_list, vlist) { if (v->msti) { attr.id = SWITCHDEV_ATTR_ID_VLAN_MSTI; attr.u.vlan_msti.vid = v->vid; attr.u.vlan_msti.msti = v->msti; err = nb->notifier_call(nb, SWITCHDEV_PORT_ATTR_SET, &attr_info); err = notifier_to_errno(err); if (err) return err; } } return 0; } static int br_switchdev_vlan_replay_one(struct notifier_block *nb, struct net_device *dev, struct switchdev_obj_port_vlan *vlan, const void *ctx, unsigned long action, struct netlink_ext_ack *extack) { struct switchdev_notifier_port_obj_info obj_info = { .info = { .dev = dev, .extack = extack, .ctx = ctx, }, .obj = &vlan->obj, }; int err; err = nb->notifier_call(nb, action, &obj_info); return notifier_to_errno(err); } static int br_switchdev_vlan_replay_group(struct notifier_block *nb, struct net_device *dev, struct net_bridge_vlan_group *vg, const void *ctx, unsigned long action, struct netlink_ext_ack *extack) { struct net_bridge_vlan *v; int err = 0; u16 pvid; if (!vg) return 0; pvid = br_get_pvid(vg); list_for_each_entry(v, &vg->vlan_list, vlist) { struct switchdev_obj_port_vlan vlan = { .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = br_vlan_flags(v, pvid), .vid = v->vid, }; if (!br_vlan_should_use(v)) continue; err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx, action, extack); if (err) return err; } return 0; } static int br_switchdev_vlan_replay(struct net_device *br_dev, const void *ctx, bool adding, struct notifier_block *nb, struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(br_dev); struct net_bridge_port *p; unsigned long action; int err; ASSERT_RTNL(); if (!nb) return 0; if (!netif_is_bridge_master(br_dev)) return -EINVAL; if (adding) action = SWITCHDEV_PORT_OBJ_ADD; else action = SWITCHDEV_PORT_OBJ_DEL; err = br_switchdev_vlan_replay_group(nb, br_dev, br_vlan_group(br), ctx, action, extack); if (err) return err; list_for_each_entry(p, &br->port_list, list) { struct net_device *dev = p->dev; err = br_switchdev_vlan_replay_group(nb, dev, nbp_vlan_group(p), ctx, action, extack); if (err) return err; } if (adding) { err = br_switchdev_vlan_attr_replay(br_dev, ctx, nb, extack); if (err) return err; } return 0; } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct br_switchdev_mdb_complete_info { struct net_bridge_port *port; struct br_ip ip; }; static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *priv) { struct br_switchdev_mdb_complete_info *data = priv; struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p; struct net_bridge_mdb_entry *mp; struct net_bridge_port *port = data->port; struct net_bridge *br = port->br; u8 old_flags; if (err == -EOPNOTSUPP) goto out_free; spin_lock_bh(&br->multicast_lock); mp = br_mdb_ip_get(br, &data->ip); if (!mp) goto out; for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port != port) continue; old_flags = p->flags; br_multicast_set_pg_offload_flags(p, !err); if (br_mdb_should_notify(br, old_flags ^ p->flags)) br_mdb_flag_change_notify(br->dev, mp, p); } out: spin_unlock_bh(&br->multicast_lock); out_free: kfree(priv); } static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, const struct net_bridge_mdb_entry *mp) { if (mp->addr.proto == htons(ETH_P_IP)) ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr); #if IS_ENABLED(CONFIG_IPV6) else if (mp->addr.proto == htons(ETH_P_IPV6)) ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr); #endif else ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr); mdb->vid = mp->addr.vid; } static void br_switchdev_host_mdb_one(struct net_device *dev, struct net_device *lower_dev, struct net_bridge_mdb_entry *mp, int type) { struct switchdev_obj_port_mdb mdb = { .obj = { .id = SWITCHDEV_OBJ_ID_HOST_MDB, .flags = SWITCHDEV_F_DEFER, .orig_dev = dev, }, }; br_switchdev_mdb_populate(&mdb, mp); switch (type) { case RTM_NEWMDB: switchdev_port_obj_add(lower_dev, &mdb.obj, NULL); break; case RTM_DELMDB: switchdev_port_obj_del(lower_dev, &mdb.obj); break; } } static void br_switchdev_host_mdb(struct net_device *dev, struct net_bridge_mdb_entry *mp, int type) { struct net_device *lower_dev; struct list_head *iter; netdev_for_each_lower_dev(dev, lower_dev, iter) br_switchdev_host_mdb_one(dev, lower_dev, mp, type); } static int br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, const struct switchdev_obj_port_mdb *mdb, unsigned long action, const void *ctx, struct netlink_ext_ack *extack) { struct switchdev_notifier_port_obj_info obj_info = { .info = { .dev = dev, .extack = extack, .ctx = ctx, }, .obj = &mdb->obj, }; int err; err = nb->notifier_call(nb, action, &obj_info); return notifier_to_errno(err); } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, struct net_device *dev, unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { struct switchdev_obj_port_mdb mdb = { .obj = { .id = id, .orig_dev = orig_dev, }, }; struct switchdev_obj_port_mdb *pmdb; br_switchdev_mdb_populate(&mdb, mp); if (action == SWITCHDEV_PORT_OBJ_ADD && switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { /* This event is already in the deferred queue of * events, so this replay must be elided, lest the * driver receives duplicate events for it. This can * only happen when replaying additions, since * modifications are always immediately visible in * br->mdb_list, whereas actual event delivery may be * delayed. */ return 0; } pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); if (!pmdb) return -ENOMEM; list_add_tail(&pmdb->obj.list, mdb_list); return 0; } void br_switchdev_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type) { struct br_switchdev_mdb_complete_info *complete_info; struct switchdev_obj_port_mdb mdb = { .obj = { .id = SWITCHDEV_OBJ_ID_PORT_MDB, .flags = SWITCHDEV_F_DEFER, }, }; if (!pg) return br_switchdev_host_mdb(dev, mp, type); br_switchdev_mdb_populate(&mdb, mp); mdb.obj.orig_dev = pg->key.port->dev; switch (type) { case RTM_NEWMDB: complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); if (!complete_info) break; complete_info->port = pg->key.port; complete_info->ip = mp->addr; mdb.obj.complete_priv = complete_info; mdb.obj.complete = br_switchdev_mdb_complete; if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL)) kfree(complete_info); break; case RTM_DELMDB: switchdev_port_obj_del(pg->key.port->dev, &mdb.obj); break; } } #endif static int br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, const void *ctx, bool adding, struct notifier_block *nb, struct netlink_ext_ack *extack) { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING const struct net_bridge_mdb_entry *mp; struct switchdev_obj *obj, *tmp; struct net_bridge *br; unsigned long action; LIST_HEAD(mdb_list); int err = 0; ASSERT_RTNL(); if (!nb) return 0; if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) return -EINVAL; br = netdev_priv(br_dev); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; if (adding) action = SWITCHDEV_PORT_OBJ_ADD; else action = SWITCHDEV_PORT_OBJ_DEL; /* br_switchdev_mdb_queue_one() will take care to not queue a * replay of an event that is already pending in the switchdev * deferred queue. In order to safely determine that, there * must be no new deferred MDB notifications enqueued for the * duration of the MDB scan. Therefore, grab the write-side * lock to avoid racing with any concurrent IGMP/MLD snooping. */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj), action, ctx, extack); if (err == -EOPNOTSUPP) err = 0; if (err) goto out_free_mdb; } out_free_mdb: list_for_each_entry_safe(obj, tmp, &mdb_list, list) { list_del(&obj->list); kfree(SWITCHDEV_OBJ_PORT_MDB(obj)); } if (err) return err; #endif return 0; } static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, struct netlink_ext_ack *extack) { struct net_device *br_dev = p->br->dev; struct net_device *dev = p->dev; int err; err = br_switchdev_vlan_replay(br_dev, ctx, true, blocking_nb, extack); if (err && err != -EOPNOTSUPP) return err; err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack); if (err) { /* -EOPNOTSUPP not propagated from MDB replay. */ return err; } err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb); if (err && err != -EOPNOTSUPP) return err; return 0; } static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb) { struct net_device *br_dev = p->br->dev; struct net_device *dev = p->dev; br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb); br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); /* Make sure that the device leaving this bridge has seen all * relevant events before it is disassociated. In the normal * case, when the device is directly attached to the bridge, * this is covered by del_nbp(). If the association was indirect * however, e.g. via a team or bond, and the device is leaving * that intermediate device, then the bridge port remains in * place. */ switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a * switchdev hardware domain to it. */ int br_switchdev_port_offload(struct net_bridge_port *p, struct net_device *dev, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, bool tx_fwd_offload, struct netlink_ext_ack *extack) { struct netdev_phys_item_id ppid; int err; err = netif_get_port_parent_id(dev, &ppid, false); if (err) return err; err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack); if (err) return err; err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack); if (err) goto out_switchdev_del; return 0; out_switchdev_del: nbp_switchdev_del(p); return err; } void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb) { nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb); nbp_switchdev_del(p); } int br_switchdev_port_replay(struct net_bridge_port *p, struct net_device *dev, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, struct netlink_ext_ack *extack) { return nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack); } |
| 149 149 149 149 149 149 134 132 131 132 132 130 130 1 26 108 5 130 132 149 149 2 2 2 1 132 132 6420 6420 6423 9 12 6419 14 17 17 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 | // SPDX-License-Identifier: GPL-2.0 /* * Workingset detection * * Copyright (C) 2013 Red Hat, Inc., Johannes Weiner */ #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/writeback.h> #include <linux/shmem_fs.h> #include <linux/pagemap.h> #include <linux/atomic.h> #include <linux/module.h> #include <linux/swap.h> #include <linux/dax.h> #include <linux/fs.h> #include <linux/mm.h> #include "internal.h" /* * Double CLOCK lists * * Per node, two clock lists are maintained for file pages: the * inactive and the active list. Freshly faulted pages start out at * the head of the inactive list and page reclaim scans pages from the * tail. Pages that are accessed multiple times on the inactive list * are promoted to the active list, to protect them from reclaim, * whereas active pages are demoted to the inactive list when the * active list grows too big. * * fault ------------------------+ * | * +--------------+ | +-------------+ * reclaim <- | inactive | <-+-- demotion | active | <--+ * +--------------+ +-------------+ | * | | * +-------------- promotion ------------------+ * * * Access frequency and refault distance * * A workload is thrashing when its pages are frequently used but they * are evicted from the inactive list every time before another access * would have promoted them to the active list. * * In cases where the average access distance between thrashing pages * is bigger than the size of memory there is nothing that can be * done - the thrashing set could never fit into memory under any * circumstance. * * However, the average access distance could be bigger than the * inactive list, yet smaller than the size of memory. In this case, * the set could fit into memory if it weren't for the currently * active pages - which may be used more, hopefully less frequently: * * +-memory available to cache-+ * | | * +-inactive------+-active----+ * a b | c d e f g h i | J K L M N | * +---------------+-----------+ * * It is prohibitively expensive to accurately track access frequency * of pages. But a reasonable approximation can be made to measure * thrashing on the inactive list, after which refaulting pages can be * activated optimistically to compete with the existing active pages. * * Approximating inactive page access frequency - Observations: * * 1. When a page is accessed for the first time, it is added to the * head of the inactive list, slides every existing inactive page * towards the tail by one slot, and pushes the current tail page * out of memory. * * 2. When a page is accessed for the second time, it is promoted to * the active list, shrinking the inactive list by one slot. This * also slides all inactive pages that were faulted into the cache * more recently than the activated page towards the tail of the * inactive list. * * Thus: * * 1. The sum of evictions and activations between any two points in * time indicate the minimum number of inactive pages accessed in * between. * * 2. Moving one inactive page N page slots towards the tail of the * list requires at least N inactive page accesses. * * Combining these: * * 1. When a page is finally evicted from memory, the number of * inactive pages accessed while the page was in cache is at least * the number of page slots on the inactive list. * * 2. In addition, measuring the sum of evictions and activations (E) * at the time of a page's eviction, and comparing it to another * reading (R) at the time the page faults back into memory tells * the minimum number of accesses while the page was not cached. * This is called the refault distance. * * Because the first access of the page was the fault and the second * access the refault, we combine the in-cache distance with the * out-of-cache distance to get the complete minimum access distance * of this page: * * NR_inactive + (R - E) * * And knowing the minimum access distance of a page, we can easily * tell if the page would be able to stay in cache assuming all page * slots in the cache were available: * * NR_inactive + (R - E) <= NR_inactive + NR_active * * If we have swap we should consider about NR_inactive_anon and * NR_active_anon, so for page cache and anonymous respectively: * * NR_inactive_file + (R - E) <= NR_inactive_file + NR_active_file * + NR_inactive_anon + NR_active_anon * * NR_inactive_anon + (R - E) <= NR_inactive_anon + NR_active_anon * + NR_inactive_file + NR_active_file * * Which can be further simplified to: * * (R - E) <= NR_active_file + NR_inactive_anon + NR_active_anon * * (R - E) <= NR_active_anon + NR_inactive_file + NR_active_file * * Put into words, the refault distance (out-of-cache) can be seen as * a deficit in inactive list space (in-cache). If the inactive list * had (R - E) more page slots, the page would not have been evicted * in between accesses, but activated instead. And on a full system, * the only thing eating into inactive list space is active pages. * * * Refaulting inactive pages * * All that is known about the active list is that the pages have been * accessed more than once in the past. This means that at any given * time there is actually a good chance that pages on the active list * are no longer in active use. * * So when a refault distance of (R - E) is observed and there are at * least (R - E) pages in the userspace workingset, the refaulting page * is activated optimistically in the hope that (R - E) pages are actually * used less frequently than the refaulting page - or even not used at * all anymore. * * That means if inactive cache is refaulting with a suitable refault * distance, we assume the cache workingset is transitioning and put * pressure on the current workingset. * * If this is wrong and demotion kicks in, the pages which are truly * used more frequently will be reactivated while the less frequently * used once will be evicted from memory. * * But if this is right, the stale pages will be pushed out of memory * and the used pages get to stay in cache. * * Refaulting active pages * * If on the other hand the refaulting pages have recently been * deactivated, it means that the active list is no longer protecting * actively used cache from reclaim. The cache is NOT transitioning to * a different workingset; the existing workingset is thrashing in the * space allocated to the page cache. * * * Implementation * * For each node's LRU lists, a counter for inactive evictions and * activations is maintained (node->nonresident_age). * * On eviction, a snapshot of this counter (along with some bits to * identify the node) is stored in the now empty page cache * slot of the evicted page. This is called a shadow entry. * * On cache misses for which there are shadow entries, an eligible * refault distance will immediately activate the refaulting page. */ #define WORKINGSET_SHIFT 1 #define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ WORKINGSET_SHIFT + NODES_SHIFT + \ MEM_CGROUP_ID_SHIFT) #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) /* * Eviction timestamps need to be able to cover the full range of * actionable refaults. However, bits are tight in the xarray * entry, and after storing the identifier for the lruvec there might * not be enough left to represent every single actionable refault. In * that case, we have to sacrifice granularity for distance, and group * evictions into coarser buckets by shaving off lower timestamp bits. */ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; eviction = (eviction << WORKINGSET_SHIFT) | workingset; return xa_mk_value(eviction); } static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, unsigned long *evictionp, bool *workingsetp) { unsigned long entry = xa_to_value(shadow); int memcgid, nid; bool workingset; workingset = entry & ((1UL << WORKINGSET_SHIFT) - 1); entry >>= WORKINGSET_SHIFT; nid = entry & ((1UL << NODES_SHIFT) - 1); entry >>= NODES_SHIFT; memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); entry >>= MEM_CGROUP_ID_SHIFT; *memcgidp = memcgid; *pgdat = NODE_DATA(nid); *evictionp = entry; *workingsetp = workingset; } #ifdef CONFIG_LRU_GEN static void *lru_gen_eviction(struct folio *folio) { int hist; unsigned long token; unsigned long min_seq; struct lruvec *lruvec; struct lru_gen_folio *lrugen; int type = folio_is_file_lru(folio); int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); bool workingset = folio_test_workingset(folio); int tier = lru_tier_from_refs(refs, workingset); struct mem_cgroup *memcg = folio_memcg(folio); struct pglist_data *pgdat = folio_pgdat(folio); BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); lruvec = mem_cgroup_lruvec(memcg, pgdat); lrugen = &lruvec->lrugen; min_seq = READ_ONCE(lrugen->min_seq[type]); token = (min_seq << LRU_REFS_WIDTH) | max(refs - 1, 0); hist = lru_hist_from_seq(min_seq); atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); } /* * Tests if the shadow entry is for a folio that was recently evicted. * Fills in @lruvec, @token, @workingset with the values unpacked from shadow. */ static bool lru_gen_test_recent(void *shadow, struct lruvec **lruvec, unsigned long *token, bool *workingset) { int memcg_id; unsigned long max_seq; struct mem_cgroup *memcg; struct pglist_data *pgdat; unpack_shadow(shadow, &memcg_id, &pgdat, token, workingset); memcg = mem_cgroup_from_id(memcg_id); *lruvec = mem_cgroup_lruvec(memcg, pgdat); max_seq = READ_ONCE((*lruvec)->lrugen.max_seq); max_seq &= EVICTION_MASK >> LRU_REFS_WIDTH; return abs_diff(max_seq, *token >> LRU_REFS_WIDTH) < MAX_NR_GENS; } static void lru_gen_refault(struct folio *folio, void *shadow) { bool recent; int hist, tier, refs; bool workingset; unsigned long token; struct lruvec *lruvec; struct lru_gen_folio *lrugen; int type = folio_is_file_lru(folio); int delta = folio_nr_pages(folio); rcu_read_lock(); recent = lru_gen_test_recent(shadow, &lruvec, &token, &workingset); if (lruvec != folio_lruvec(folio)) goto unlock; mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta); if (!recent) goto unlock; lrugen = &lruvec->lrugen; hist = lru_hist_from_seq(READ_ONCE(lrugen->min_seq[type])); refs = (token & (BIT(LRU_REFS_WIDTH) - 1)) + 1; tier = lru_tier_from_refs(refs, workingset); atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); /* see folio_add_lru() where folio_set_active() will be called */ if (lru_gen_in_fault()) mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); if (workingset) { folio_set_workingset(folio); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); } else set_mask_bits(&folio->flags.f, LRU_REFS_MASK, (refs - 1UL) << LRU_REFS_PGOFF); unlock: rcu_read_unlock(); } #else /* !CONFIG_LRU_GEN */ static void *lru_gen_eviction(struct folio *folio) { return NULL; } static bool lru_gen_test_recent(void *shadow, struct lruvec **lruvec, unsigned long *token, bool *workingset) { return false; } static void lru_gen_refault(struct folio *folio, void *shadow) { } #endif /* CONFIG_LRU_GEN */ /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged * @nr_pages: the number of pages to count * * As in-memory pages are aged, non-resident pages need to be aged as * well, in order for the refault distances later on to be comparable * to the in-memory dimensions. This function allows reclaim and LRU * operations to drive the non-resident aging along in parallel. */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages) { /* * Reclaiming a cgroup means reclaiming all its children in a * round-robin fashion. That means that each cgroup has an LRU * order that is composed of the LRU orders of its child * cgroups; and every page has an LRU position not just in the * cgroup that owns it, but in all of that group's ancestors. * * So when the physical inactive list of a leaf cgroup ages, * the virtual inactive lists of all its parents, including * the root cgroup's, age as well. */ do { atomic_long_add(nr_pages, &lruvec->nonresident_age); } while ((lruvec = parent_lruvec(lruvec))); } /** * workingset_eviction - note the eviction of a folio from memory * @target_memcg: the cgroup that is causing the reclaim * @folio: the folio being evicted * * Return: a shadow entry to be stored in @folio->mapping->i_pages in place * of the evicted @folio so that a later refault can be detected. */ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) { struct pglist_data *pgdat = folio_pgdat(folio); unsigned long eviction; struct lruvec *lruvec; int memcgid; /* Folio is fully exclusive and pins folio's memory cgroup pointer */ VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (lru_gen_enabled()) return lru_gen_eviction(folio); lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); eviction >>= bucket_order; workingset_age_nonresident(lruvec, folio_nr_pages(folio)); return pack_shadow(memcgid, pgdat, eviction, folio_test_workingset(folio)); } /** * workingset_test_recent - tests if the shadow entry is for a folio that was * recently evicted. Also fills in @workingset with the value unpacked from * shadow. * @shadow: the shadow entry to be tested. * @file: whether the corresponding folio is from the file lru. * @workingset: where the workingset value unpacked from shadow should * be stored. * @flush: whether to flush cgroup rstat. * * Return: true if the shadow is for a recently evicted folio; false otherwise. */ bool workingset_test_recent(void *shadow, bool file, bool *workingset, bool flush) { struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; unsigned long workingset_size; unsigned long refault; int memcgid; struct pglist_data *pgdat; unsigned long eviction; if (lru_gen_enabled()) { bool recent; rcu_read_lock(); recent = lru_gen_test_recent(shadow, &eviction_lruvec, &eviction, workingset); rcu_read_unlock(); return recent; } rcu_read_lock(); unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset); eviction <<= bucket_order; /* * Look up the memcg associated with the stored ID. It might * have been deleted since the folio's eviction. * * Note that in rare events the ID could have been recycled * for a new cgroup that refaults a shared folio. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging * algorithm's job to shake out the minimum access frequency * for the active cache. * * XXX: On !CONFIG_MEMCG, this will always return NULL; it * would be better if the root_mem_cgroup existed in all * configurations instead. */ eviction_memcg = mem_cgroup_from_id(memcgid); if (!mem_cgroup_tryget(eviction_memcg)) eviction_memcg = NULL; rcu_read_unlock(); if (!mem_cgroup_disabled() && !eviction_memcg) return false; /* * Flush stats (and potentially sleep) outside the RCU read section. * * Note that workingset_test_recent() itself might be called in RCU read * section (for e.g, in cachestat) - these callers need to skip flushing * stats (via the flush argument). * * XXX: With per-memcg flushing and thresholding, is ratelimiting * still needed here? */ if (flush) mem_cgroup_flush_stats_ratelimited(eviction_memcg); eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); refault = atomic_long_read(&eviction_lruvec->nonresident_age); /* * Calculate the refault distance * * The unsigned subtraction here gives an accurate distance * across nonresident_age overflows in most cases. There is a * special case: usually, shadow entries have a short lifetime * and are either refaulted or reclaimed along with the inode * before they get too old. But it is not impossible for the * nonresident_age to lap a shadow entry in the field, which * can then result in a false small refault distance, leading * to a false activation should this old entry actually * refault again. However, earlier kernels used to deactivate * unconditionally with *every* reclaim invocation for the * longest time, so the occasional inappropriate activation * leading to pressure on the active list is not a problem. */ refault_distance = (refault - eviction) & EVICTION_MASK; /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if * all the memory was available to the workingset. Whether * workingset competition needs to consider anon or not depends * on having free swap space. */ workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); if (!file) { workingset_size += lruvec_page_state(eviction_lruvec, NR_INACTIVE_FILE); } if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) { workingset_size += lruvec_page_state(eviction_lruvec, NR_ACTIVE_ANON); if (file) { workingset_size += lruvec_page_state(eviction_lruvec, NR_INACTIVE_ANON); } } mem_cgroup_put(eviction_memcg); return refault_distance <= workingset_size; } /** * workingset_refault - Evaluate the refault of a previously evicted folio. * @folio: The freshly allocated replacement folio. * @shadow: Shadow entry of the evicted folio. * * Calculates and evaluates the refault distance of the previously * evicted folio in the context of the node and the memcg whose memory * pressure caused the eviction. */ void workingset_refault(struct folio *folio, void *shadow) { bool file = folio_is_file_lru(folio); struct pglist_data *pgdat; struct mem_cgroup *memcg; struct lruvec *lruvec; bool workingset; long nr; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (lru_gen_enabled()) { lru_gen_refault(folio, shadow); return; } /* * The activation decision for this folio is made at the level * where the eviction occurred, as that is where the LRU order * during folio reclaim is being determined. * * However, the cgroup that will own the folio is the one that * is actually experiencing the refault event. Make sure the folio is * locked to guarantee folio_memcg() stability throughout. */ nr = folio_nr_pages(folio); memcg = folio_memcg(folio); pgdat = folio_pgdat(folio); lruvec = mem_cgroup_lruvec(memcg, pgdat); mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); if (!workingset_test_recent(shadow, file, &workingset, true)) return; folio_set_active(folio); workingset_age_nonresident(lruvec, nr); mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr); /* Folio was active prior to eviction */ if (workingset) { folio_set_workingset(folio); /* * XXX: Move to folio_add_lru() when it supports new vs * putback */ lru_note_cost_refault(folio); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); } } /** * workingset_activation - note a page activation * @folio: Folio that is being activated. */ void workingset_activation(struct folio *folio) { /* * Filter non-memcg pages here, e.g. unmap can call * mark_page_accessed() on VDSO pages. */ if (mem_cgroup_disabled() || folio_memcg_charged(folio)) workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio)); } /* * Shadow entries reflect the share of the working set that does not * fit into memory, so their number depends on the access pattern of * the workload. In most cases, they will refault or get reclaimed * along with the inode, but a (malicious) workload that streams * through files with a total size several times that of available * memory, while preventing the inodes from being reclaimed, can * create excessive amounts of shadow nodes. To keep a lid on this, * track shadow nodes and reclaim them when they grow way past the * point where they would still be useful. */ struct list_lru shadow_nodes; void workingset_update_node(struct xa_node *node) { struct page *page = virt_to_page(node); /* * Track non-empty nodes that contain only shadow entries; * unlink those that contain pages or are being freed. * * Avoid acquiring the list_lru lock when the nodes are * already where they should be. The list_empty() test is safe * as node->private_list is protected by the i_pages lock. */ lockdep_assert_held(&node->array->xa_lock); if (node->count && node->count == node->nr_values) { if (list_empty(&node->private_list)) { list_lru_add_obj(&shadow_nodes, &node->private_list); __inc_node_page_state(page, WORKINGSET_NODES); } } else { if (!list_empty(&node->private_list)) { list_lru_del_obj(&shadow_nodes, &node->private_list); __dec_node_page_state(page, WORKINGSET_NODES); } } } static unsigned long count_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long max_nodes; unsigned long nodes; unsigned long pages; nodes = list_lru_shrink_count(&shadow_nodes, sc); if (!nodes) return SHRINK_EMPTY; /* * Approximate a reasonable limit for the nodes * containing shadow entries. We don't need to keep more * shadow entries than possible pages on the active list, * since refault distances bigger than that are dismissed. * * The size of the active list converges toward 100% of * overall page cache as memory grows, with only a tiny * inactive list. Assume the total cache size for that. * * Nodes might be sparsely populated, with only one shadow * entry in the extreme case. Obviously, we cannot keep one * node for every eligible shadow entry, so compromise on a * worst-case density of 1/8th. Below that, not all eligible * refaults can be detected anymore. * * On 64-bit with 7 xa_nodes per page and 64 slots * each, this will reclaim shadow entries when they consume * ~1.8% of available memory: * * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE */ #ifdef CONFIG_MEMCG if (sc->memcg) { struct lruvec *lruvec; int i; mem_cgroup_flush_stats_ratelimited(sc->memcg); lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); for (pages = 0, i = 0; i < NR_LRU_LISTS; i++) pages += lruvec_page_state_local(lruvec, NR_LRU_BASE + i); pages += lruvec_page_state_local( lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT; pages += lruvec_page_state_local( lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT; } else #endif pages = node_present_pages(sc->nid); max_nodes = pages >> (XA_CHUNK_SHIFT - 3); if (nodes <= max_nodes) return 0; return nodes - max_nodes; } static enum lru_status shadow_lru_isolate(struct list_head *item, struct list_lru_one *lru, void *arg) __must_hold(lru->lock) { struct xa_node *node = container_of(item, struct xa_node, private_list); struct address_space *mapping; int ret; /* * Page cache insertions and deletions synchronously maintain * the shadow node LRU under the i_pages lock and the * &lru->lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the &lru->lock pins any * address_space that has nodes on the LRU. * * We can then safely transition to the i_pages lock to * pin only the address_space of the particular node we want * to reclaim, take the node off-LRU, and drop the &lru->lock. */ mapping = container_of(node->array, struct address_space, i_pages); /* Coming from the list, invert the lock order */ if (!xa_trylock(&mapping->i_pages)) { spin_unlock_irq(&lru->lock); ret = LRU_RETRY; goto out; } /* For page cache we need to hold i_lock */ if (mapping->host != NULL) { if (!spin_trylock(&mapping->host->i_lock)) { xa_unlock(&mapping->i_pages); spin_unlock_irq(&lru->lock); ret = LRU_RETRY; goto out; } } list_lru_isolate(lru, item); __dec_node_page_state(virt_to_page(node), WORKINGSET_NODES); spin_unlock(&lru->lock); /* * The nodes should only contain one or more shadow entries, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ if (WARN_ON_ONCE(!node->nr_values)) goto out_invalid; if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; xa_delete_node(node, workingset_update_node); __inc_lruvec_kmem_state(node, WORKINGSET_NODERECLAIM); out_invalid: xa_unlock_irq(&mapping->i_pages); if (mapping->host != NULL) { if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); } ret = LRU_REMOVED_RETRY; out: cond_resched(); return ret; } static unsigned long scan_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { /* list_lru lock nests inside the IRQ-safe i_pages lock */ return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate, NULL); } /* * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe * i_pages lock. */ static struct lock_class_key shadow_nodes_key; static int __init workingset_init(void) { struct shrinker *workingset_shadow_shrinker; unsigned int timestamp_bits; unsigned int max_order; int ret = -ENOMEM; BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); /* * Calculate the eviction bucket size to cover the longest * actionable refault distance, which is currently half of * memory (totalram_pages/2). However, memory hotplug may add * some more pages at runtime, so keep working with up to * double the initial memory by using totalram_pages as-is. */ timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; max_order = fls_long(totalram_pages() - 1); if (max_order > timestamp_bits) bucket_order = max_order - timestamp_bits; pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); workingset_shadow_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-shadow"); if (!workingset_shadow_shrinker) goto err; ret = list_lru_init_memcg_key(&shadow_nodes, workingset_shadow_shrinker, &shadow_nodes_key); if (ret) goto err_list_lru; workingset_shadow_shrinker->count_objects = count_shadow_nodes; workingset_shadow_shrinker->scan_objects = scan_shadow_nodes; /* ->count reports only fully expendable nodes */ workingset_shadow_shrinker->seeks = 0; shrinker_register(workingset_shadow_shrinker); return 0; err_list_lru: shrinker_free(workingset_shadow_shrinker); err: return ret; } module_init(workingset_init); |
| 3 2038 2040 5 4 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | // SPDX-License-Identifier: GPL-2.0-only /* * * Authors: * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de> */ #include <linux/if_arp.h> #include <linux/module.h> #include <net/6lowpan.h> #include <net/addrconf.h> #include "6lowpan_i.h" int lowpan_register_netdevice(struct net_device *dev, enum lowpan_lltypes lltype) { int i, ret; switch (lltype) { case LOWPAN_LLTYPE_IEEE802154: dev->addr_len = EUI64_ADDR_LEN; break; case LOWPAN_LLTYPE_BTLE: dev->addr_len = ETH_ALEN; break; } dev->type = ARPHRD_6LOWPAN; dev->mtu = IPV6_MIN_MTU; lowpan_dev(dev)->lltype = lltype; spin_lock_init(&lowpan_dev(dev)->ctx.lock); for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) lowpan_dev(dev)->ctx.table[i].id = i; dev->ndisc_ops = &lowpan_ndisc_ops; ret = register_netdevice(dev); if (ret < 0) return ret; lowpan_dev_debugfs_init(dev); return ret; } EXPORT_SYMBOL(lowpan_register_netdevice); int lowpan_register_netdev(struct net_device *dev, enum lowpan_lltypes lltype) { int ret; rtnl_lock(); ret = lowpan_register_netdevice(dev, lltype); rtnl_unlock(); return ret; } EXPORT_SYMBOL(lowpan_register_netdev); void lowpan_unregister_netdevice(struct net_device *dev) { unregister_netdevice(dev); lowpan_dev_debugfs_exit(dev); } EXPORT_SYMBOL(lowpan_unregister_netdevice); void lowpan_unregister_netdev(struct net_device *dev) { rtnl_lock(); lowpan_unregister_netdevice(dev); rtnl_unlock(); } EXPORT_SYMBOL(lowpan_unregister_netdev); int addrconf_ifid_802154_6lowpan(u8 *eui, struct net_device *dev) { struct wpan_dev *wpan_dev = lowpan_802154_dev(dev)->wdev->ieee802154_ptr; /* Set short_addr autoconfiguration if short_addr is present only */ if (!lowpan_802154_is_valid_src_short_addr(wpan_dev->short_addr)) return -1; /* For either address format, all zero addresses MUST NOT be used */ if (wpan_dev->pan_id == cpu_to_le16(0x0000) && wpan_dev->short_addr == cpu_to_le16(0x0000)) return -1; /* Alternatively, if no PAN ID is known, 16 zero bits may be used */ if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)) memset(eui, 0, 2); else ieee802154_le16_to_be16(eui, &wpan_dev->pan_id); /* The "Universal/Local" (U/L) bit shall be set to zero */ eui[0] &= ~2; eui[2] = 0; eui[3] = 0xFF; eui[4] = 0xFE; eui[5] = 0; ieee802154_le16_to_be16(&eui[6], &wpan_dev->short_addr); return 0; } static int lowpan_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev; struct in6_addr addr; int i; if (dev->type != ARPHRD_6LOWPAN) return NOTIFY_DONE; idev = __in6_dev_get(dev); if (!idev) return NOTIFY_DONE; switch (event) { case NETDEV_UP: case NETDEV_CHANGE: /* (802.15.4 6LoWPAN short address slaac handling */ if (lowpan_is_ll(dev, LOWPAN_LLTYPE_IEEE802154) && addrconf_ifid_802154_6lowpan(addr.s6_addr + 8, dev) == 0) { __ipv6_addr_set_half(&addr.s6_addr32[0], htonl(0xFE800000), 0); addrconf_add_linklocal(idev, &addr, 0); } break; case NETDEV_DOWN: for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, &lowpan_dev(dev)->ctx.table[i].flags); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static struct notifier_block lowpan_notifier = { .notifier_call = lowpan_event, }; static int __init lowpan_module_init(void) { int ret; lowpan_debugfs_init(); ret = register_netdevice_notifier(&lowpan_notifier); if (ret < 0) { lowpan_debugfs_exit(); return ret; } request_module_nowait("nhc_dest"); request_module_nowait("nhc_fragment"); request_module_nowait("nhc_hop"); request_module_nowait("nhc_ipv6"); request_module_nowait("nhc_mobility"); request_module_nowait("nhc_routing"); request_module_nowait("nhc_udp"); return 0; } static void __exit lowpan_module_exit(void) { lowpan_debugfs_exit(); unregister_netdevice_notifier(&lowpan_notifier); } module_init(lowpan_module_init); module_exit(lowpan_module_exit); MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module"); MODULE_LICENSE("GPL"); |
| 6 14 14 14 14 9 3 2 6 2 14 14 6 6 6 31 31 7 12 10 6 14 9 9 9 5 4 2 2 1 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 | // SPDX-License-Identifier: GPL-2.0-or-later /* * dir.c - Operations for configfs directories. * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * * configfs Copyright (C) 2005 Oracle. All rights reserved. */ #undef DEBUG #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/configfs.h> #include "configfs_internal.h" /* * Protects mutations of configfs_dirent linkage together with proper i_mutex * Also protects mutations of symlinks linkage to target configfs_dirent * Mutators of configfs_dirent linkage must *both* have the proper inode locked * and configfs_dirent_lock locked, in that order. * This allows one to safely traverse configfs_dirent trees and symlinks without * having to lock inodes. * * Protects setting of CONFIGFS_USET_DROPPING: checking the flag * unlocked is not reliable unless in detach_groups() called from * rmdir()/unregister() and from configfs_attach_group() */ DEFINE_SPINLOCK(configfs_dirent_lock); /* * All of link_obj/unlink_obj/link_group/unlink_group require that * subsys->su_mutex is held. * But parent configfs_subsystem is NULL when config_item is root. * Use this mutex when config_item is root. */ static DEFINE_MUTEX(configfs_subsystem_mutex); static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); /* * Set sd->s_dentry to null only when this dentry is the one * that is going to be killed. Otherwise configfs_d_iput may * run just after configfs_lookup and set sd->s_dentry to * NULL even it's still in use. */ if (sd->s_dentry == dentry) sd->s_dentry = NULL; spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); } const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, }; #ifdef CONFIG_LOCKDEP /* * Helpers to make lockdep happy with our recursive locking of default groups' * inodes (see configfs_attach_group() and configfs_detach_group()). * We put default groups i_mutexes in separate classes according to their depth * from the youngest non-default group ancestor. * * For a non-default group A having default groups A/B, A/C, and A/C/D, default * groups A/B and A/C will have their inode's mutex in class * default_group_class[0], and default group A/C/D will be in * default_group_class[1]. * * The lock classes are declared and assigned in inode.c, according to the * s_depth value. * The s_depth value is initialized to -1, adjusted to >= 0 when attaching * default groups, and reset to -1 when all default groups are attached. During * attachment, if configfs_create() sees s_depth > 0, the lock class of the new * inode's mutex is set to default_group_class[s_depth - 1]. */ static void configfs_init_dirent_depth(struct configfs_dirent *sd) { sd->s_depth = -1; } static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, struct configfs_dirent *sd) { int parent_depth = parent_sd->s_depth; if (parent_depth >= 0) sd->s_depth = parent_depth + 1; } static void configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) { /* * item's i_mutex class is already setup, so s_depth is now only * used to set new sub-directories s_depth, which is always done * with item's i_mutex locked. */ /* * sd->s_depth == -1 iff we are a non default group. * else (we are a default group) sd->s_depth > 0 (see * create_dir()). */ if (sd->s_depth == -1) /* * We are a non default group and we are going to create * default groups. */ sd->s_depth = 0; } static void configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) { /* We will not create default groups anymore. */ sd->s_depth = -1; } #else /* CONFIG_LOCKDEP */ static void configfs_init_dirent_depth(struct configfs_dirent *sd) { } static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, struct configfs_dirent *sd) { } static void configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) { } static void configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) { } #endif /* CONFIG_LOCKDEP */ static struct configfs_fragment *new_fragment(void) { struct configfs_fragment *p; p = kmalloc(sizeof(struct configfs_fragment), GFP_KERNEL); if (p) { atomic_set(&p->frag_count, 1); init_rwsem(&p->frag_sem); p->frag_dead = false; } return p; } void put_fragment(struct configfs_fragment *frag) { if (frag && atomic_dec_and_test(&frag->frag_count)) kfree(frag); } struct configfs_fragment *get_fragment(struct configfs_fragment *frag) { if (likely(frag)) atomic_inc(&frag->frag_count); return frag; } /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, void *element, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL); if (!sd) return ERR_PTR(-ENOMEM); atomic_set(&sd->s_count, 1); INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; sd->s_type = type; configfs_init_dirent_depth(sd); spin_lock(&configfs_dirent_lock); if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); kmem_cache_free(configfs_dir_cachep, sd); return ERR_PTR(-ENOENT); } sd->s_frag = get_fragment(frag); /* * configfs_lookup scans only for unpinned items. s_children is * partitioned so that configfs_lookup can bail out early. * CONFIGFS_PINNED and CONFIGFS_NOT_PINNED are not symmetrical. readdir * cursors still need to be inserted at the front of the list. */ if (sd->s_type & CONFIGFS_PINNED) list_add_tail(&sd->s_sibling, &parent_sd->s_children); else list_add(&sd->s_sibling, &parent_sd->s_children); spin_unlock(&configfs_dirent_lock); return sd; } /* * * Return -EEXIST if there is already a configfs element with the same * name for the same parent. * * called with parent inode's i_mutex held */ static int configfs_dirent_exists(struct dentry *dentry) { struct configfs_dirent *parent_sd = dentry->d_parent->d_fsdata; const unsigned char *new = dentry->d_name.name; struct configfs_dirent *sd; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (sd->s_element) { const unsigned char *existing = configfs_get_name(sd); if (strcmp(existing, new)) continue; else return -EEXIST; } } return 0; } int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, umode_t mode, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; sd = configfs_new_dirent(parent_sd, element, type, frag); if (IS_ERR(sd)) return PTR_ERR(sd); sd->s_mode = mode; sd->s_dentry = dentry; if (dentry) dentry->d_fsdata = configfs_get(sd); return 0; } static void configfs_remove_dirent(struct dentry *dentry) { struct configfs_dirent *sd = dentry->d_fsdata; if (!sd) return; spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); spin_unlock(&configfs_dirent_lock); configfs_put(sd); } /** * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. * @frag: config_item's fragment. * * Note: user-created entries won't be allowed under this new directory * until it is validated by configfs_dir_set_ready() */ static int configfs_create_dir(struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag) { int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; struct dentry *p = dentry->d_parent; struct inode *inode; BUG_ON(!item); error = configfs_make_dirent(p->d_fsdata, dentry, item, mode, CONFIGFS_DIR | CONFIGFS_USET_CREATING, frag); if (unlikely(error)) return error; configfs_set_dir_dirent_depth(p->d_fsdata, dentry->d_fsdata); inode = configfs_create(dentry, mode); if (IS_ERR(inode)) goto out_remove; inode->i_op = &configfs_dir_inode_operations; inode->i_fop = &configfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); /* already hashed */ dget(dentry); /* pin directory dentries in core */ inc_nlink(d_inode(p)); item->ci_dentry = dentry; return 0; out_remove: configfs_put(dentry->d_fsdata); configfs_remove_dirent(dentry); return PTR_ERR(inode); } /* * Allow userspace to create new entries under a new directory created with * configfs_create_dir(), and under all of its chidlren directories recursively. * @sd configfs_dirent of the new directory to validate * * Caller must hold configfs_dirent_lock. */ static void configfs_dir_set_ready(struct configfs_dirent *sd) { struct configfs_dirent *child_sd; sd->s_type &= ~CONFIGFS_USET_CREATING; list_for_each_entry(child_sd, &sd->s_children, s_sibling) if (child_sd->s_type & CONFIGFS_USET_CREATING) configfs_dir_set_ready(child_sd); } /* * Check that a directory does not belong to a directory hierarchy being * attached and not validated yet. * @sd configfs_dirent of the directory to check * * @return non-zero iff the directory was validated * * Note: takes configfs_dirent_lock, so the result may change from false to true * in two consecutive calls, but never from true to false. */ int configfs_dirent_is_ready(struct configfs_dirent *sd) { int ret; spin_lock(&configfs_dirent_lock); ret = !(sd->s_type & CONFIGFS_USET_CREATING); spin_unlock(&configfs_dirent_lock); return ret; } int configfs_create_link(struct configfs_dirent *target, struct dentry *parent, struct dentry *dentry, char *body) { int err = 0; umode_t mode = S_IFLNK | S_IRWXUGO; struct configfs_dirent *p = parent->d_fsdata; struct inode *inode; err = configfs_make_dirent(p, dentry, target, mode, CONFIGFS_ITEM_LINK, p->s_frag); if (err) return err; inode = configfs_create(dentry, mode); if (IS_ERR(inode)) goto out_remove; inode->i_link = body; inode->i_op = &configfs_symlink_inode_operations; d_instantiate(dentry, inode); dget(dentry); /* pin link dentries in core */ return 0; out_remove: configfs_put(dentry->d_fsdata); configfs_remove_dirent(dentry); return PTR_ERR(inode); } static void remove_dir(struct dentry * d) { struct dentry * parent = dget(d->d_parent); configfs_remove_dirent(d); if (d_really_is_positive(d)) simple_rmdir(d_inode(parent),d); pr_debug(" o %pd removing done (%d)\n", d, d_count(d)); dput(parent); } /** * configfs_remove_dir - remove an config_item's directory. * @item: config_item we're removing. * * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be configfs_rmdir() below, instead of calling separately. * * Caller holds the mutex of the item's inode */ static void configfs_remove_dir(struct config_item * item) { struct dentry * dentry = dget(item->ci_dentry); if (!dentry) return; remove_dir(dentry); /** * Drop reference from dget() on entrance. */ dput(dentry); } static struct dentry * configfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct configfs_dirent * sd; struct inode *inode = NULL; if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); /* * Fake invisibility if dir belongs to a group/default groups hierarchy * being attached * * This forbids userspace to read/write attributes of items which may * not complete their initialization, since the dentries of the * attributes won't be instantiated. */ if (!configfs_dirent_is_ready(parent_sd)) return ERR_PTR(-ENOENT); spin_lock(&configfs_dirent_lock); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { /* * s_children is partitioned, see configfs_new_dirent. The first * pinned item indicates we can stop scanning. */ if (sd->s_type & CONFIGFS_PINNED) break; /* * Note: CONFIGFS_PINNED and CONFIGFS_NOT_PINNED are asymmetric. * there may be a readdir cursor in this list */ if ((sd->s_type & CONFIGFS_NOT_PINNED) && !strcmp(configfs_get_name(sd), dentry->d_name.name)) { struct configfs_attribute *attr = sd->s_element; umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; spin_unlock(&configfs_dirent_lock); inode = configfs_create(dentry, mode); if (IS_ERR(inode)) { configfs_put(sd); return ERR_CAST(inode); } if (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) { inode->i_size = 0; inode->i_fop = &configfs_bin_file_operations; } else { inode->i_size = PAGE_SIZE; inode->i_fop = &configfs_file_operations; } goto done; } } spin_unlock(&configfs_dirent_lock); done: d_add(dentry, inode); return NULL; } /* * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are * attributes and are removed by rmdir(). We recurse, setting * CONFIGFS_USET_DROPPING on all children that are candidates for * default detach. * If there is an error, the caller will reset the flags via * configfs_detach_rollback(). */ static int configfs_detach_prep(struct dentry *dentry, struct dentry **wait) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; int ret; /* Mark that we're trying to drop the group */ parent_sd->s_type |= CONFIGFS_USET_DROPPING; ret = -EBUSY; if (parent_sd->s_links) goto out; ret = 0; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element || (sd->s_type & CONFIGFS_NOT_PINNED)) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { /* Abort if racing with mkdir() */ if (sd->s_type & CONFIGFS_USET_IN_MKDIR) { if (wait) *wait= dget(sd->s_dentry); return -EAGAIN; } /* * Yup, recursive. If there's a problem, blame * deep nesting of default_groups */ ret = configfs_detach_prep(sd->s_dentry, wait); if (!ret) continue; } else ret = -ENOTEMPTY; break; } out: return ret; } /* * Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was * set. */ static void configfs_detach_rollback(struct dentry *dentry) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; parent_sd->s_type &= ~CONFIGFS_USET_DROPPING; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) if (sd->s_type & CONFIGFS_USET_DEFAULT) configfs_detach_rollback(sd->s_dentry); } static void detach_attrs(struct config_item * item) { struct dentry * dentry = dget(item->ci_dentry); struct configfs_dirent * parent_sd; struct configfs_dirent * sd, * tmp; if (!dentry) return; pr_debug("configfs %s: dropping attrs for dir\n", dentry->d_name.name); parent_sd = dentry->d_fsdata; list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED)) continue; spin_lock(&configfs_dirent_lock); list_del_init(&sd->s_sibling); spin_unlock(&configfs_dirent_lock); configfs_drop_dentry(sd, dentry); configfs_put(sd); } /** * Drop reference from dget() on entrance. */ dput(dentry); } static int populate_attrs(struct config_item *item) { const struct config_item_type *t = item->ci_type; struct configfs_group_operations *ops; struct configfs_attribute *attr; struct configfs_bin_attribute *bin_attr; int error = 0; int i; if (!t) return -EINVAL; ops = t->ct_group_ops; if (t->ct_attrs) { for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) { if (ops && ops->is_visible && !ops->is_visible(item, attr, i)) continue; if ((error = configfs_create_file(item, attr))) break; } } if (!error && t->ct_bin_attrs) { for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) { if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i)) continue; error = configfs_create_bin_file(item, bin_attr); if (error) break; } } if (error) detach_attrs(item); return error; } static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag); static void configfs_detach_group(struct config_item *item); static void detach_groups(struct config_group *group) { struct dentry * dentry = dget(group->cg_item.ci_dentry); struct dentry *child; struct configfs_dirent *parent_sd; struct configfs_dirent *sd, *tmp; if (!dentry) return; parent_sd = dentry->d_fsdata; list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & CONFIGFS_USET_DEFAULT)) continue; child = sd->s_dentry; inode_lock(d_inode(child)); configfs_detach_group(sd->s_element); d_inode(child)->i_flags |= S_DEAD; dont_mount(child); inode_unlock(d_inode(child)); d_delete(child); dput(child); } /** * Drop reference from dget() on entrance. */ dput(dentry); } /* * This fakes mkdir(2) on a default_groups[] entry. It * creates a dentry, attachs it, and then does fixup * on the sd->s_type. * * We could, perhaps, tweak our parent's ->mkdir for a minute and * try using vfs_mkdir. Just a thought. */ static int create_default_group(struct config_group *parent_group, struct config_group *group, struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; /* We trust the caller holds a reference to parent */ struct dentry *child, *parent = parent_group->cg_item.ci_dentry; if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; ret = -ENOMEM; child = d_alloc_name(parent, group->cg_item.ci_name); if (child) { d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, &group->cg_item, child, frag); if (!ret) { sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; } else { BUG_ON(d_inode(child)); d_drop(child); dput(child); } } return ret; } static int populate_groups(struct config_group *group, struct configfs_fragment *frag) { struct config_group *new_group; int ret = 0; list_for_each_entry(new_group, &group->default_groups, group_entry) { ret = create_default_group(group, new_group, frag); if (ret) { detach_groups(group); break; } } return ret; } void configfs_remove_default_groups(struct config_group *group) { struct config_group *g, *n; list_for_each_entry_safe(g, n, &group->default_groups, group_entry) { list_del(&g->group_entry); config_item_put(&g->cg_item); } } EXPORT_SYMBOL(configfs_remove_default_groups); /* * All of link_obj/unlink_obj/link_group/unlink_group require that * subsys->su_mutex is held. */ static void unlink_obj(struct config_item *item) { struct config_group *group; group = item->ci_group; if (group) { list_del_init(&item->ci_entry); item->ci_group = NULL; item->ci_parent = NULL; /* Drop the reference for ci_entry */ config_item_put(item); /* Drop the reference for ci_parent */ config_group_put(group); } } static void link_obj(struct config_item *parent_item, struct config_item *item) { /* * Parent seems redundant with group, but it makes certain * traversals much nicer. */ item->ci_parent = parent_item; /* * We hold a reference on the parent for the child's ci_parent * link. */ item->ci_group = config_group_get(to_config_group(parent_item)); list_add_tail(&item->ci_entry, &item->ci_group->cg_children); /* * We hold a reference on the child for ci_entry on the parent's * cg_children */ config_item_get(item); } static void unlink_group(struct config_group *group) { struct config_group *new_group; list_for_each_entry(new_group, &group->default_groups, group_entry) unlink_group(new_group); group->cg_subsys = NULL; unlink_obj(&group->cg_item); } static void link_group(struct config_group *parent_group, struct config_group *group) { struct config_group *new_group; struct configfs_subsystem *subsys = NULL; /* gcc is a turd */ link_obj(&parent_group->cg_item, &group->cg_item); if (parent_group->cg_subsys) subsys = parent_group->cg_subsys; else if (configfs_is_root(&parent_group->cg_item)) subsys = to_configfs_subsystem(group); else BUG(); group->cg_subsys = subsys; list_for_each_entry(new_group, &group->default_groups, group_entry) link_group(group, new_group); } /* * The goal is that configfs_attach_item() (and * configfs_attach_group()) can be called from either the VFS or this * module. That is, they assume that the items have been created, * the dentry allocated, and the dcache is all ready to go. * * If they fail, they must clean up after themselves as if they * had never been called. The caller (VFS or local function) will * handle cleaning up the dcache bits. * * configfs_detach_group() and configfs_detach_item() behave similarly on * the way out. They assume that the proper semaphores are held, they * clean up the configfs items, and they expect their callers will * handle the dcache bits. */ static int configfs_attach_item(struct config_item *parent_item, struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag) { int ret; ret = configfs_create_dir(item, dentry, frag); if (!ret) { ret = populate_attrs(item); if (ret) { /* * We are going to remove an inode and its dentry but * the VFS may already have hit and used them. Thus, * we must lock them as rmdir() would. */ inode_lock(d_inode(dentry)); configfs_remove_dir(item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); inode_unlock(d_inode(dentry)); d_delete(dentry); } } return ret; } /* Caller holds the mutex of the item's inode */ static void configfs_detach_item(struct config_item *item) { detach_attrs(item); configfs_remove_dir(item); } static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, struct dentry *dentry, struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; ret = configfs_attach_item(parent_item, item, dentry, frag); if (!ret) { sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; /* * FYI, we're faking mkdir in populate_groups() * We must lock the group's inode to avoid races with the VFS * which can already hit the inode and try to add/remove entries * under it. * * We must also lock the inode to remove it safely in case of * error, as rmdir() would. */ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); configfs_adjust_dir_dirent_depth_before_populate(sd); ret = populate_groups(to_config_group(item), frag); if (ret) { configfs_detach_item(item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); } configfs_adjust_dir_dirent_depth_after_populate(sd); inode_unlock(d_inode(dentry)); if (ret) d_delete(dentry); } return ret; } /* Caller holds the mutex of the group's inode */ static void configfs_detach_group(struct config_item *item) { detach_groups(to_config_group(item)); configfs_detach_item(item); } /* * After the item has been detached from the filesystem view, we are * ready to tear it out of the hierarchy. Notify the client before * we do that so they can perform any cleanup that requires * navigating the hierarchy. A client does not need to provide this * callback. The subsystem semaphore MUST be held by the caller, and * references must be valid for both items. It also assumes the * caller has validated ci_type. */ static void client_disconnect_notify(struct config_item *parent_item, struct config_item *item) { const struct config_item_type *type; type = parent_item->ci_type; BUG_ON(!type); if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) type->ct_group_ops->disconnect_notify(to_config_group(parent_item), item); } /* * Drop the initial reference from make_item()/make_group() * This function assumes that reference is held on item * and that item holds a valid reference to the parent. Also, it * assumes the caller has validated ci_type. */ static void client_drop_item(struct config_item *parent_item, struct config_item *item) { const struct config_item_type *type; type = parent_item->ci_type; BUG_ON(!type); /* * If ->drop_item() exists, it is responsible for the * config_item_put(). */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), item); else config_item_put(item); } #ifdef DEBUG static void configfs_dump_one(struct configfs_dirent *sd, int level) { pr_info("%*s\"%s\":\n", level, " ", configfs_get_name(sd)); #define type_print(_type) if (sd->s_type & _type) pr_info("%*s %s\n", level, " ", #_type) type_print(CONFIGFS_ROOT); type_print(CONFIGFS_DIR); type_print(CONFIGFS_ITEM_ATTR); type_print(CONFIGFS_ITEM_LINK); type_print(CONFIGFS_USET_DIR); type_print(CONFIGFS_USET_DEFAULT); type_print(CONFIGFS_USET_DROPPING); #undef type_print } static int configfs_dump(struct configfs_dirent *sd, int level) { struct configfs_dirent *child_sd; int ret = 0; configfs_dump_one(sd, level); if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) return 0; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { ret = configfs_dump(child_sd, level + 2); if (ret) break; } return ret; } #endif /* * configfs_depend_item() and configfs_undepend_item() * * WARNING: Do not call these from a configfs callback! * * This describes these functions and their helpers. * * Allow another kernel system to depend on a config_item. If this * happens, the item cannot go away until the dependent can live without * it. The idea is to give client modules as simple an interface as * possible. When a system asks them to depend on an item, they just * call configfs_depend_item(). If the item is live and the client * driver is in good shape, we'll happily do the work for them. * * Why is the locking complex? Because configfs uses the VFS to handle * all locking, but this function is called outside the normal * VFS->configfs path. So it must take VFS locks to prevent the * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is * why you can't call these functions underneath configfs callbacks. * * Note, btw, that this can be called at *any* time, even when a configfs * subsystem isn't registered, or when configfs is loading or unloading. * Just like configfs_register_subsystem(). So we take the same * precautions. We pin the filesystem. We lock configfs_dirent_lock. * If we can find the target item in the * configfs tree, it must be part of the subsystem tree as well, so we * do not need the subsystem semaphore. Holding configfs_dirent_lock helps * locking out mkdir() and rmdir(), who might be racing us. */ /* * configfs_depend_prep() * * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are * attributes. This is similar but not the same to configfs_detach_prep(). * Note that configfs_detach_prep() expects the parent to be locked when it * is called, but we lock the parent *inside* configfs_depend_prep(). We * do that so we can unlock it if we find nothing. * * Here we do a depth-first search of the dentry hierarchy looking for * our object. * We deliberately ignore items tagged as dropping since they are virtually * dead, as well as items in the middle of attachment since they virtually * do not exist yet. This completes the locking out of racing mkdir() and * rmdir(). * Note: subdirectories in the middle of attachment start with s_type = * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of * s_type is in configfs_new_dirent(), which has configfs_dirent_lock. * * If the target is not found, -ENOENT is bubbled up. * * This adds a requirement that all config_items be unique! * * This is recursive. There isn't * much on the stack, though, so folks that need this function - be careful * about your stack! Patches will be accepted to make it iterative. */ static int configfs_depend_prep(struct dentry *origin, struct config_item *target) { struct configfs_dirent *child_sd, *sd; int ret = 0; BUG_ON(!origin || !origin->d_fsdata); sd = origin->d_fsdata; if (sd->s_element == target) /* Boo-yah */ goto out; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { if ((child_sd->s_type & CONFIGFS_DIR) && !(child_sd->s_type & CONFIGFS_USET_DROPPING) && !(child_sd->s_type & CONFIGFS_USET_CREATING)) { ret = configfs_depend_prep(child_sd->s_dentry, target); if (!ret) goto out; /* Child path boo-yah */ } } /* We looped all our children and didn't find target */ ret = -ENOENT; out: return ret; } static int configfs_do_depend_item(struct dentry *subsys_dentry, struct config_item *target) { struct configfs_dirent *p; int ret; spin_lock(&configfs_dirent_lock); /* Scan the tree, return 0 if found */ ret = configfs_depend_prep(subsys_dentry, target); if (ret) goto out_unlock_dirent_lock; /* * We are sure that the item is not about to be removed by rmdir(), and * not in the middle of attachment by mkdir(). */ p = target->ci_dentry->d_fsdata; p->s_dependent_count += 1; out_unlock_dirent_lock: spin_unlock(&configfs_dirent_lock); return ret; } static inline struct configfs_dirent * configfs_find_subsys_dentry(struct configfs_dirent *root_sd, struct config_item *subsys_item) { struct configfs_dirent *p; struct configfs_dirent *ret = NULL; list_for_each_entry(p, &root_sd->s_children, s_sibling) { if (p->s_type & CONFIGFS_DIR && p->s_element == subsys_item) { ret = p; break; } } return ret; } int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target) { int ret; struct configfs_dirent *subsys_sd; struct config_item *s_item = &subsys->su_group.cg_item; struct dentry *root; /* * Pin the configfs filesystem. This means we can safely access * the root of the configfs filesystem. */ root = configfs_pin_fs(); if (IS_ERR(root)) return PTR_ERR(root); /* * Next, lock the root directory. We're going to check that the * subsystem is really registered, and so we need to lock out * configfs_[un]register_subsystem(). */ inode_lock(d_inode(root)); subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item); if (!subsys_sd) { ret = -ENOENT; goto out_unlock_fs; } /* Ok, now we can trust subsys/s_item */ ret = configfs_do_depend_item(subsys_sd->s_dentry, target); out_unlock_fs: inode_unlock(d_inode(root)); /* * If we succeeded, the fs is pinned via other methods. If not, * we're done with it anyway. So release_fs() is always right. */ configfs_release_fs(); return ret; } EXPORT_SYMBOL(configfs_depend_item); /* * Release the dependent linkage. This is much simpler than * configfs_depend_item() because we know that the client driver is * pinned, thus the subsystem is pinned, and therefore configfs is pinned. */ void configfs_undepend_item(struct config_item *target) { struct configfs_dirent *sd; /* * Since we can trust everything is pinned, we just need * configfs_dirent_lock. */ spin_lock(&configfs_dirent_lock); sd = target->ci_dentry->d_fsdata; BUG_ON(sd->s_dependent_count < 1); sd->s_dependent_count -= 1; /* * After this unlock, we cannot trust the item to stay alive! * DO NOT REFERENCE item after this unlock. */ spin_unlock(&configfs_dirent_lock); } EXPORT_SYMBOL(configfs_undepend_item); /* * caller_subsys is a caller's subsystem not target's. This is used to * determine if we should lock root and check subsys or not. When we are * in the same subsystem as our target there is no need to do locking as * we know that subsys is valid and is not unregistered during this function * as we are called from callback of one of his children and VFS holds a lock * on some inode. Otherwise we have to lock our root to ensure that target's * subsystem it is not unregistered during this function. */ int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys, struct config_item *target) { struct configfs_subsystem *target_subsys; struct config_group *root, *parent; struct configfs_dirent *subsys_sd; int ret = -ENOENT; /* Disallow this function for configfs root */ if (configfs_is_root(target)) return -EINVAL; parent = target->ci_group; /* * This may happen when someone is trying to depend root * directory of some subsystem */ if (configfs_is_root(&parent->cg_item)) { target_subsys = to_configfs_subsystem(to_config_group(target)); root = parent; } else { target_subsys = parent->cg_subsys; /* Find a cofnigfs root as we may need it for locking */ for (root = parent; !configfs_is_root(&root->cg_item); root = root->cg_item.ci_group) ; } if (target_subsys != caller_subsys) { /* * We are in other configfs subsystem, so we have to do * additional locking to prevent other subsystem from being * unregistered */ inode_lock(d_inode(root->cg_item.ci_dentry)); /* * As we are trying to depend item from other subsystem * we have to check if this subsystem is still registered */ subsys_sd = configfs_find_subsys_dentry( root->cg_item.ci_dentry->d_fsdata, &target_subsys->su_group.cg_item); if (!subsys_sd) goto out_root_unlock; } else { subsys_sd = target_subsys->su_group.cg_item.ci_dentry->d_fsdata; } /* Now we can execute core of depend item */ ret = configfs_do_depend_item(subsys_sd->s_dentry, target); if (target_subsys != caller_subsys) out_root_unlock: /* * We were called from subsystem other than our target so we * took some locks so now it's time to release them */ inode_unlock(d_inode(root->cg_item.ci_dentry)); return ret; } EXPORT_SYMBOL(configfs_depend_item_unlocked); static struct dentry *configfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int ret = 0; int module_got = 0; struct config_group *group = NULL; struct config_item *item = NULL; struct config_item *parent_item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; const struct config_item_type *type; struct module *subsys_owner = NULL, *new_item_owner = NULL; struct configfs_fragment *frag; char *name; sd = dentry->d_parent->d_fsdata; /* * Fake invisibility if dir belongs to a group/default groups hierarchy * being attached */ if (!configfs_dirent_is_ready(sd)) { ret = -ENOENT; goto out; } if (!(sd->s_type & CONFIGFS_USET_DIR)) { ret = -EPERM; goto out; } frag = new_fragment(); if (!frag) { ret = -ENOMEM; goto out; } /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); if (!type || !type->ct_group_ops || (!type->ct_group_ops->make_group && !type->ct_group_ops->make_item)) { ret = -EPERM; /* Lack-of-mkdir returns -EPERM */ goto out_put; } /* * The subsystem may belong to a different module than the item * being created. We don't want to safely pin the new item but * fail to pin the subsystem it sits under. */ if (!subsys->su_group.cg_item.ci_type) { ret = -EINVAL; goto out_put; } subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; if (!try_module_get(subsys_owner)) { ret = -EINVAL; goto out_put; } name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { ret = -ENOMEM; goto out_subsys_put; } snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); mutex_lock(&subsys->su_mutex); if (type->ct_group_ops->make_group) { group = type->ct_group_ops->make_group(to_config_group(parent_item), name); if (!group) group = ERR_PTR(-ENOMEM); if (!IS_ERR(group)) { link_group(to_config_group(parent_item), group); item = &group->cg_item; } else ret = PTR_ERR(group); } else { item = type->ct_group_ops->make_item(to_config_group(parent_item), name); if (!item) item = ERR_PTR(-ENOMEM); if (!IS_ERR(item)) link_obj(parent_item, item); else ret = PTR_ERR(item); } mutex_unlock(&subsys->su_mutex); kfree(name); if (ret) { /* * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ goto out_subsys_put; } /* * link_obj() has been called (via link_group() for groups). * From here on out, errors must clean that up. */ type = item->ci_type; if (!type) { ret = -EINVAL; goto out_unlink; } new_item_owner = type->ct_owner; if (!try_module_get(new_item_owner)) { ret = -EINVAL; goto out_unlink; } /* * I hate doing it this way, but if there is * an error, module_put() probably should * happen after any cleanup. */ module_got = 1; /* * Make racing rmdir() fail if it did not tag parent with * CONFIGFS_USET_DROPPING * Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will * fail and let rmdir() terminate correctly */ spin_lock(&configfs_dirent_lock); /* This will make configfs_detach_prep() fail */ sd->s_type |= CONFIGFS_USET_IN_MKDIR; spin_unlock(&configfs_dirent_lock); if (group) ret = configfs_attach_group(parent_item, item, dentry, frag); else ret = configfs_attach_item(parent_item, item, dentry, frag); spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; if (!ret) configfs_dir_set_ready(dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); out_unlink: if (ret) { /* Tear down everything we built up */ mutex_lock(&subsys->su_mutex); client_disconnect_notify(parent_item, item); if (group) unlink_group(group); else unlink_obj(item); client_drop_item(parent_item, item); mutex_unlock(&subsys->su_mutex); if (module_got) module_put(new_item_owner); } out_subsys_put: if (ret) module_put(subsys_owner); out_put: /* * link_obj()/link_group() took a reference from child->parent, * so the parent is safely pinned. We can drop our working * reference. */ config_item_put(parent_item); put_fragment(frag); out: return ERR_PTR(ret); } static int configfs_rmdir(struct inode *dir, struct dentry *dentry) { struct config_item *parent_item; struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct configfs_fragment *frag; struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); if (!parent_item->ci_type) { config_item_put(parent_item); return -EINVAL; } /* configfs_mkdir() shouldn't have allowed this */ BUG_ON(!subsys->su_group.cg_item.ci_type); subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; /* * Ensure that no racing symlink() will make detach_prep() fail while * the new link is temporarily attached */ do { struct dentry *wait; mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); /* * Here's where we check for dependents. We're protected by * configfs_dirent_lock. * If no dependent, atomically tag the item as dropping. */ ret = sd->s_dependent_count ? -EBUSY : 0; if (!ret) { ret = configfs_detach_prep(dentry, &wait); if (ret) configfs_detach_rollback(dentry); } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); if (ret) { if (ret != -EAGAIN) { config_item_put(parent_item); return ret; } /* Wait until the racing operation terminates */ inode_lock(d_inode(wait)); inode_unlock(d_inode(wait)); dput(wait); } } while (ret == -EAGAIN); frag = sd->s_frag; if (down_write_killable(&frag->frag_sem)) { spin_lock(&configfs_dirent_lock); configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); config_item_put(parent_item); return -EINTR; } frag->frag_dead = true; up_write(&frag->frag_sem); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); /* Drop reference from above, item already holds one. */ config_item_put(parent_item); if (item->ci_type) dead_item_owner = item->ci_type->ct_owner; if (sd->s_type & CONFIGFS_USET_DIR) { configfs_detach_group(item); mutex_lock(&subsys->su_mutex); client_disconnect_notify(parent_item, item); unlink_group(to_config_group(item)); } else { configfs_detach_item(item); mutex_lock(&subsys->su_mutex); client_disconnect_notify(parent_item, item); unlink_obj(item); } client_drop_item(parent_item, item); mutex_unlock(&subsys->su_mutex); /* Drop our reference from above */ config_item_put(item); module_put(dead_item_owner); module_put(subsys_owner); return 0; } const struct inode_operations configfs_dir_inode_operations = { .mkdir = configfs_mkdir, .rmdir = configfs_rmdir, .symlink = configfs_symlink, .unlink = configfs_unlink, .lookup = configfs_lookup, .setattr = configfs_setattr, }; const struct inode_operations configfs_root_inode_operations = { .lookup = configfs_lookup, .setattr = configfs_setattr, }; static int configfs_dir_open(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct configfs_dirent * parent_sd = dentry->d_fsdata; int err; inode_lock(d_inode(dentry)); /* * Fake invisibility if dir belongs to a group/default groups hierarchy * being attached */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { file->private_data = configfs_new_dirent(parent_sd, NULL, 0, NULL); err = PTR_ERR_OR_ZERO(file->private_data); } inode_unlock(d_inode(dentry)); return err; } static int configfs_dir_close(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct configfs_dirent * cursor = file->private_data; inode_lock(d_inode(dentry)); spin_lock(&configfs_dirent_lock); list_del_init(&cursor->s_sibling); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(dentry)); release_configfs_dirent(cursor); return 0; } static int configfs_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct super_block *sb = dentry->d_sb; struct configfs_dirent * parent_sd = dentry->d_fsdata; struct configfs_dirent *cursor = file->private_data; struct list_head *p, *q = &cursor->s_sibling; ino_t ino = 0; if (!dir_emit_dots(file, ctx)) return 0; spin_lock(&configfs_dirent_lock); if (ctx->pos == 2) list_move(q, &parent_sd->s_children); for (p = q->next; p != &parent_sd->s_children; p = p->next) { struct configfs_dirent *next; const char *name; int len; struct inode *inode = NULL; next = list_entry(p, struct configfs_dirent, s_sibling); if (!next->s_element) continue; /* * We'll have a dentry and an inode for * PINNED items and for open attribute * files. We lock here to prevent a race * with configfs_d_iput() clearing * s_dentry before calling iput(). * * Why do we go to the trouble? If * someone has an attribute file open, * the inode number should match until * they close it. Beyond that, we don't * care. */ dentry = next->s_dentry; if (dentry) inode = d_inode(dentry); if (inode) ino = inode->i_ino; spin_unlock(&configfs_dirent_lock); if (!inode) ino = iunique(sb, 2); name = configfs_get_name(next); len = strlen(name); if (!dir_emit(ctx, name, len, ino, fs_umode_to_dtype(next->s_mode))) return 0; spin_lock(&configfs_dirent_lock); list_move(q, p); p = q; ctx->pos++; } spin_unlock(&configfs_dirent_lock); return 0; } static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry * dentry = file->f_path.dentry; switch (whence) { case 1: offset += file->f_pos; fallthrough; case 0: if (offset >= 0) break; fallthrough; default: return -EINVAL; } if (offset != file->f_pos) { file->f_pos = offset; if (file->f_pos >= 2) { struct configfs_dirent *sd = dentry->d_fsdata; struct configfs_dirent *cursor = file->private_data; struct list_head *p; loff_t n = file->f_pos - 2; spin_lock(&configfs_dirent_lock); list_del(&cursor->s_sibling); p = sd->s_children.next; while (n && p != &sd->s_children) { struct configfs_dirent *next; next = list_entry(p, struct configfs_dirent, s_sibling); if (next->s_element) n--; p = p->next; } list_add_tail(&cursor->s_sibling, p); spin_unlock(&configfs_dirent_lock); } } return offset; } const struct file_operations configfs_dir_operations = { .open = configfs_dir_open, .release = configfs_dir_close, .llseek = configfs_dir_lseek, .read = generic_read_dir, .iterate_shared = configfs_readdir, }; /** * configfs_register_group - creates a parent-child relation between two groups * @parent_group: parent group * @group: child group * * link groups, creates dentry for the child and attaches it to the * parent dentry. * * Return: 0 on success, negative errno code on error */ int configfs_register_group(struct config_group *parent_group, struct config_group *group) { struct configfs_subsystem *subsys = parent_group->cg_subsys; struct dentry *parent; struct configfs_fragment *frag; int ret; frag = new_fragment(); if (!frag) return -ENOMEM; mutex_lock(&subsys->su_mutex); link_group(parent_group, group); mutex_unlock(&subsys->su_mutex); parent = parent_group->cg_item.ci_dentry; inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); ret = create_default_group(parent_group, group, frag); if (ret) goto err_out; spin_lock(&configfs_dirent_lock); configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(parent)); put_fragment(frag); return 0; err_out: inode_unlock(d_inode(parent)); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); put_fragment(frag); return ret; } EXPORT_SYMBOL(configfs_register_group); /** * configfs_unregister_group() - unregisters a child group from its parent * @group: parent group to be unregistered * * Undoes configfs_register_group() */ void configfs_unregister_group(struct config_group *group) { struct configfs_subsystem *subsys = group->cg_subsys; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; struct configfs_dirent *sd = dentry->d_fsdata; struct configfs_fragment *frag = sd->s_frag; down_write(&frag->frag_sem); frag->frag_dead = true; up_write(&frag->frag_sem); inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); configfs_detach_prep(dentry, NULL); spin_unlock(&configfs_dirent_lock); configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); d_drop(dentry); fsnotify_rmdir(d_inode(parent), dentry); inode_unlock(d_inode(parent)); dput(dentry); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); } EXPORT_SYMBOL(configfs_unregister_group); /** * configfs_register_default_group() - allocates and registers a child group * @parent_group: parent group * @name: child group name * @item_type: child item type description * * boilerplate to allocate and register a child group with its parent. We need * kzalloc'ed memory because child's default_group is initially empty. * * Return: allocated config group or ERR_PTR() on error */ struct config_group * configfs_register_default_group(struct config_group *parent_group, const char *name, const struct config_item_type *item_type) { int ret; struct config_group *group; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); config_group_init_type_name(group, name, item_type); ret = configfs_register_group(parent_group, group); if (ret) { kfree(group); return ERR_PTR(ret); } return group; } EXPORT_SYMBOL(configfs_register_default_group); /** * configfs_unregister_default_group() - unregisters and frees a child group * @group: the group to act on */ void configfs_unregister_default_group(struct config_group *group) { configfs_unregister_group(group); kfree(group); } EXPORT_SYMBOL(configfs_unregister_default_group); int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; struct config_group *group = &subsys->su_group; struct dentry *dentry; struct dentry *root; struct configfs_dirent *sd; struct configfs_fragment *frag; frag = new_fragment(); if (!frag) return -ENOMEM; root = configfs_pin_fs(); if (IS_ERR(root)) { put_fragment(frag); return PTR_ERR(root); } if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; sd = root->d_fsdata; mutex_lock(&configfs_subsystem_mutex); link_group(to_config_group(sd->s_element), group); mutex_unlock(&configfs_subsystem_mutex); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); err = -ENOMEM; dentry = d_alloc_name(root, group->cg_item.ci_name); if (dentry) { d_add(dentry, NULL); err = configfs_dirent_exists(dentry); if (!err) err = configfs_attach_group(sd->s_element, &group->cg_item, dentry, frag); if (err) { BUG_ON(d_inode(dentry)); d_drop(dentry); dput(dentry); } else { spin_lock(&configfs_dirent_lock); configfs_dir_set_ready(dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); } } inode_unlock(d_inode(root)); if (err) { mutex_lock(&configfs_subsystem_mutex); unlink_group(group); mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } put_fragment(frag); return err; } void configfs_unregister_subsystem(struct configfs_subsystem *subsys) { struct config_group *group = &subsys->su_group; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *root = dentry->d_sb->s_root; struct configfs_dirent *sd = dentry->d_fsdata; struct configfs_fragment *frag = sd->s_frag; if (dentry->d_parent != root) { pr_err("Tried to unregister non-subsystem!\n"); return; } down_write(&frag->frag_sem); frag->frag_dead = true; up_write(&frag->frag_sem); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); if (configfs_detach_prep(dentry, NULL)) { pr_err("Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); inode_unlock(d_inode(dentry)); d_drop(dentry); fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(root)); dput(dentry); mutex_lock(&configfs_subsystem_mutex); unlink_group(group); mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } EXPORT_SYMBOL(configfs_register_subsystem); EXPORT_SYMBOL(configfs_unregister_subsystem); |
| 176 283 282 284 181 298 32 108 528 347 379 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ struct xlog; struct xfs_inode; struct xfs_mru_cache; struct xfs_ail; struct xfs_quotainfo; struct xfs_da_geometry; struct xfs_perag; /* dynamic preallocation free space thresholds, 5% down to 1% */ enum { XFS_LOWSP_1_PCNT = 0, XFS_LOWSP_2_PCNT, XFS_LOWSP_3_PCNT, XFS_LOWSP_4_PCNT, XFS_LOWSP_5_PCNT, XFS_LOWSP_MAX, }; /* * Error Configuration * * Error classes define the subsystem the configuration belongs to. * Error numbers define the errors that are configurable. */ enum { XFS_ERR_METADATA, XFS_ERR_CLASS_MAX, }; enum { XFS_ERR_DEFAULT, XFS_ERR_EIO, XFS_ERR_ENOSPC, XFS_ERR_ENODEV, XFS_ERR_ERRNO_MAX, }; #define XFS_ERR_RETRY_FOREVER -1 /* * Although retry_timeout is in jiffies which is normally an unsigned long, * we limit the retry timeout to 86400 seconds, or one day. So even a * signed 32-bit long is sufficient for a HZ value up to 24855. Making it * signed lets us store the special "-1" value, meaning retry forever. */ struct xfs_error_cfg { struct xfs_kobj kobj; int max_retries; long retry_timeout; /* in jiffies, -1 = infinite */ }; /* * Per-cpu deferred inode inactivation GC lists. */ struct xfs_inodegc { struct xfs_mount *mp; struct llist_head list; struct delayed_work work; int error; /* approximate count of inodes in the list */ unsigned int items; unsigned int shrinker_hits; unsigned int cpu; }; /* * Container for each type of groups, used to look up individual groups and * describes the geometry. */ struct xfs_groups { struct xarray xa; /* * Maximum capacity of the group in FSBs. * * Each group is laid out densely in the daddr space. For the * degenerate case of a pre-rtgroups filesystem, the incore rtgroup * pretends to have a zero-block and zero-blklog rtgroup. */ uint32_t blocks; /* * Log(2) of the logical size of each group. * * Compared to the blocks field above this is rounded up to the next * power of two, and thus lays out the xfs_fsblock_t/xfs_rtblock_t * space sparsely with a hole from blocks to (1 << blklog) at the end * of each group. */ uint8_t blklog; /* * Zoned devices can have gaps beyond the usable capacity of a zone and * the end in the LBA/daddr address space. In other words, the hardware * equivalent to the RT groups already takes care of the power of 2 * alignment for us. In this case the sparse FSB/RTB address space maps * 1:1 to the device address space. */ bool has_daddr_gaps; /* * Mask to extract the group-relative block number from a FSB. * For a pre-rtgroups filesystem we pretend to have one very large * rtgroup, so this mask must be 64-bit. */ uint64_t blkmask; /* * Start of the first group in the device. This is used to support a * RT device following the data device on the same block device for * SMR hard drives. */ xfs_fsblock_t start_fsb; /* * Maximum length of an atomic write for files stored in this * collection of allocation groups, in fsblocks. */ xfs_extlen_t awu_max; }; struct xfs_freecounter { /* free blocks for general use: */ struct percpu_counter count; /* total reserved blocks: */ uint64_t res_total; /* available reserved blocks: */ uint64_t res_avail; /* reserved blks @ remount,ro: */ uint64_t res_saved; }; /* * The struct xfsmount layout is optimised to separate read-mostly variables * from variables that are frequently modified. We put the read-mostly variables * first, then place all the other variables at the end. * * Typically, read-mostly variables are those that are set at mount time and * never changed again, or only change rarely as a result of things like sysfs * knobs being tweaked. */ typedef struct xfs_mount { struct xfs_sb m_sb; /* copy of fs superblock */ struct super_block *m_super; struct xfs_ail *m_ail; /* fs active log item list */ struct xfs_buf *m_sb_bp; /* buffer for superblock */ struct xfs_buf *m_rtsb_bp; /* realtime superblock */ char *m_rtname; /* realtime device name */ char *m_logname; /* external log device name */ struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ struct xfs_inode *m_rootip; /* pointer to root directory */ struct xfs_inode *m_metadirip; /* ptr to metadata directory */ struct xfs_inode *m_rtdirip; /* ptr to realtime metadir */ struct xfs_quotainfo *m_quotainfo; /* disk quota information */ struct xfs_buftarg *m_ddev_targp; /* data device */ struct xfs_buftarg *m_logdev_targp;/* log device */ struct xfs_buftarg *m_rtdev_targp; /* rt device */ void __percpu *m_inodegc; /* percpu inodegc structures */ struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_reclaim_workqueue; struct workqueue_struct *m_sync_workqueue; struct workqueue_struct *m_blockgc_wq; struct workqueue_struct *m_inodegc_wq; int m_bsize; /* fs logical block size */ uint8_t m_blkbit_log; /* blocklog + NBBY */ uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ uint8_t m_agno_log; /* log #ag's */ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ int8_t m_rtxblklog; /* log2 of rextsize, if possible */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ /* number of rt extents per rt bitmap block if rtgroups enabled */ unsigned int m_rtx_per_rbmblock; uint m_alloc_mxr[2]; /* max alloc btree records */ uint m_alloc_mnr[2]; /* min alloc btree records */ uint m_bmap_dmxr[2]; /* max bmap btree records */ uint m_bmap_dmnr[2]; /* min bmap btree records */ uint m_rmap_mxr[2]; /* max rmap btree records */ uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_rtrmap_mxr[2]; /* max rtrmap btree records */ uint m_rtrmap_mnr[2]; /* min rtrmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ uint m_rtrefc_mxr[2]; /* max rtrefc btree records */ uint m_rtrefc_mnr[2]; /* min rtrefc btree records */ uint m_alloc_maxlevels; /* max alloc btree levels */ uint m_bm_maxlevels[2]; /* max bmap btree levels */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_rtrmap_maxlevels; /* max rtrmap btree level */ uint m_refc_maxlevels; /* max refcount btree level */ uint m_rtrefc_maxlevels; /* max rtrefc btree level */ unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */ unsigned int m_rtbtree_maxlevels; /* max level of all rt btrees */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ uint m_allocsize_log;/* min write size log bytes */ uint m_allocsize_blocks; /* min write size blocks */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ unsigned int m_rsumlevels; /* rt summary levels */ xfs_filblks_t m_rsumblocks; /* size of rt summary, FSBs */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_qflags; /* quota status flags */ uint64_t m_features; /* active filesystem features */ uint64_t m_low_space[XFS_LOWSP_MAX]; uint64_t m_low_rtexts[XFS_LOWSP_MAX]; uint64_t m_rtxblkmask; /* rt extent block mask */ struct xfs_ino_geometry m_ino_geo; /* inode geometry */ struct xfs_trans_resv m_resv; /* precomputed res values */ /* low free space thresholds */ unsigned long m_opstate; /* dynamic state flags */ bool m_always_cow; bool m_fail_unmount; bool m_finobt_nores; /* no per-AG finobt resv. */ bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; unsigned int m_zonegc_low_space; /* max_atomic_write mount option value */ unsigned long long m_awu_max_bytes; /* * Bitsets of per-fs metadata that have been checked and/or are sick. * Callers must hold m_sb_lock to access these two fields. */ uint8_t m_fs_checked; uint8_t m_fs_sick; /* * Bitsets of rt metadata that have been checked and/or are sick. * Callers must hold m_sb_lock to access this field. */ uint8_t m_rt_checked; uint8_t m_rt_sick; /* * End of read-mostly variables. Frequently written variables and locks * should be placed below this comment from now on. The first variable * here is marked as cacheline aligned so they it is separated from * the read-mostly variables. */ spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */ struct percpu_counter m_icount; /* allocated inodes counter */ struct percpu_counter m_ifree; /* free inodes counter */ struct xfs_freecounter m_free[XC_FREE_NR]; /* * Count of data device blocks reserved for delayed allocations, * including indlen blocks. Does not include allocated CoW staging * extents or anything related to the rt device. */ struct percpu_counter m_delalloc_blks; /* * RT version of the above. */ struct percpu_counter m_delalloc_rtextents; /* * Global count of allocation btree blocks in use across all AGs. Only * used when perag reservation is enabled. Helps prevent block * reservation from attempting to reserve allocation btree blocks. */ atomic64_t m_allocbt_blks; struct xfs_groups m_groups[XG_TYPE_MAX]; struct delayed_work m_reclaim_work; /* background inode reclaim */ struct xfs_zone_info *m_zone_info; /* zone allocator information */ struct dentry *m_debugfs; /* debugfs parent */ struct xfs_kobj m_kobj; struct xfs_kobj m_error_kobj; struct xfs_kobj m_error_meta_kobj; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ #ifdef CONFIG_XFS_ONLINE_SCRUB_STATS struct xchk_stats *m_scrub_stats; #endif struct xfs_kobj m_zoned_kobj; xfs_agnumber_t m_agfrotor; /* last ag where space found */ atomic_t m_agirotor; /* last ag dir inode alloced */ atomic_t m_rtgrotor; /* last rtgroup rtpicked */ struct mutex m_metafile_resv_lock; uint64_t m_metafile_resv_target; uint64_t m_metafile_resv_used; uint64_t m_metafile_resv_avail; /* Memory shrinker to throttle and reprioritize inodegc */ struct shrinker *m_inodegc_shrinker; /* * Workqueue item so that we can coalesce multiple inode flush attempts * into a single flush. */ struct work_struct m_flush_inodes_work; /* * Generation of the filesysyem layout. This is incremented by each * growfs, and used by the pNFS server to ensure the client updates * its view of the block device once it gets a layout that might * reference the newly added blocks. Does not need to be persistent * as long as we only allow file system size increments, but if we * ever support shrinks it would have to be persisted in addition * to various other kinds of pain inflicted on the pNFS server. */ uint32_t m_generation; struct mutex m_growlock; /* growfs mutex */ #ifdef DEBUG /* * Frequency with which errors are injected. Replaces xfs_etest; the * value stored in here is the inverse of the frequency with which the * error triggers. 1 = always, 2 = half the time, etc. */ unsigned int *m_errortag; struct xfs_kobj m_errortag_kobj; #endif /* cpus that have inodes queued for inactivation */ struct cpumask m_inodegc_cpumask; /* Hook to feed dirent updates to an active online repair. */ struct xfs_hooks m_dir_update_hooks; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) /* * Flags for m_features. * * These are all the active features in the filesystem, regardless of how * they are configured. */ #define XFS_FEAT_ATTR (1ULL << 0) /* xattrs present in fs */ #define XFS_FEAT_NLINK (1ULL << 1) /* 32 bit link counts */ #define XFS_FEAT_QUOTA (1ULL << 2) /* quota active */ #define XFS_FEAT_ALIGN (1ULL << 3) /* inode alignment */ #define XFS_FEAT_DALIGN (1ULL << 4) /* data alignment */ #define XFS_FEAT_LOGV2 (1ULL << 5) /* version 2 logs */ #define XFS_FEAT_SECTOR (1ULL << 6) /* sector size > 512 bytes */ #define XFS_FEAT_EXTFLG (1ULL << 7) /* unwritten extents */ #define XFS_FEAT_ASCIICI (1ULL << 8) /* ASCII only case-insens. */ #define XFS_FEAT_LAZYSBCOUNT (1ULL << 9) /* Superblk counters */ #define XFS_FEAT_PARENT (1ULL << 11) /* parent pointers */ #define XFS_FEAT_PROJID32 (1ULL << 12) /* 32 bit project id */ #define XFS_FEAT_CRC (1ULL << 13) /* metadata CRCs */ #define XFS_FEAT_V3INODES (1ULL << 14) /* Version 3 inodes */ #define XFS_FEAT_PQUOTINO (1ULL << 15) /* non-shared proj/grp quotas */ #define XFS_FEAT_FTYPE (1ULL << 16) /* inode type in dir */ #define XFS_FEAT_FINOBT (1ULL << 17) /* free inode btree */ #define XFS_FEAT_RMAPBT (1ULL << 18) /* reverse map btree */ #define XFS_FEAT_REFLINK (1ULL << 19) /* reflinked files */ #define XFS_FEAT_SPINODES (1ULL << 20) /* sparse inode chunks */ #define XFS_FEAT_META_UUID (1ULL << 21) /* metadata UUID */ #define XFS_FEAT_REALTIME (1ULL << 22) /* realtime device present */ #define XFS_FEAT_INOBTCNT (1ULL << 23) /* inobt block counts */ #define XFS_FEAT_BIGTIME (1ULL << 24) /* large timestamps */ #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ #define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */ #define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */ #define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */ #define XFS_FEAT_ZONED (1ULL << 29) /* zoned RT device */ /* Mount features */ #define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */ #define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */ #define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */ #define XFS_FEAT_LARGE_IOSIZE (1ULL << 51) /* report large preferred * I/O size in stat() */ #define XFS_FEAT_WSYNC (1ULL << 52) /* synchronous metadata ops */ #define XFS_FEAT_DIRSYNC (1ULL << 53) /* synchronous directory ops */ #define XFS_FEAT_DISCARD (1ULL << 54) /* discard unused blocks */ #define XFS_FEAT_GRPID (1ULL << 55) /* group-ID assigned from directory */ #define XFS_FEAT_SMALL_INUMS (1ULL << 56) /* user wants 32bit inodes */ #define XFS_FEAT_SWALLOC (1ULL << 58) /* stripe width allocation */ #define XFS_FEAT_FILESTREAMS (1ULL << 59) /* use filestreams allocator */ #define XFS_FEAT_DAX_ALWAYS (1ULL << 60) /* DAX always enabled */ #define XFS_FEAT_DAX_NEVER (1ULL << 61) /* DAX never enabled */ #define XFS_FEAT_NORECOVERY (1ULL << 62) /* no recovery - dirty fs */ #define XFS_FEAT_NOUUID (1ULL << 63) /* ignore uuid during mount */ #define __XFS_HAS_FEAT(name, NAME) \ static inline bool xfs_has_ ## name (const struct xfs_mount *mp) \ { \ return mp->m_features & XFS_FEAT_ ## NAME; \ } /* Some features can be added dynamically so they need a set wrapper, too. */ #define __XFS_ADD_FEAT(name, NAME) \ __XFS_HAS_FEAT(name, NAME); \ static inline void xfs_add_ ## name (struct xfs_mount *mp) \ { \ mp->m_features |= XFS_FEAT_ ## NAME; \ xfs_sb_version_add ## name(&mp->m_sb); \ } /* Superblock features */ __XFS_ADD_FEAT(attr, ATTR) __XFS_HAS_FEAT(nlink, NLINK) __XFS_ADD_FEAT(quota, QUOTA) __XFS_HAS_FEAT(dalign, DALIGN) __XFS_HAS_FEAT(sector, SECTOR) __XFS_HAS_FEAT(asciici, ASCIICI) __XFS_HAS_FEAT(parent, PARENT) __XFS_HAS_FEAT(ftype, FTYPE) __XFS_HAS_FEAT(finobt, FINOBT) __XFS_HAS_FEAT(rmapbt, RMAPBT) __XFS_HAS_FEAT(reflink, REFLINK) __XFS_HAS_FEAT(sparseinodes, SPINODES) __XFS_HAS_FEAT(metauuid, META_UUID) __XFS_HAS_FEAT(realtime, REALTIME) __XFS_HAS_FEAT(inobtcounts, INOBTCNT) __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) __XFS_HAS_FEAT(large_extent_counts, NREXT64) __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE) __XFS_HAS_FEAT(metadir, METADIR) __XFS_HAS_FEAT(zoned, ZONED) __XFS_HAS_FEAT(nolifetime, NOLIFETIME) static inline bool xfs_has_rtgroups(const struct xfs_mount *mp) { /* all metadir file systems also allow rtgroups */ return xfs_has_metadir(mp); } static inline bool xfs_has_rtsb(const struct xfs_mount *mp) { /* all rtgroups filesystems with an rt section have an rtsb */ return xfs_has_rtgroups(mp) && xfs_has_realtime(mp) && !xfs_has_zoned(mp); } static inline bool xfs_has_rtrmapbt(const struct xfs_mount *mp) { return xfs_has_rtgroups(mp) && xfs_has_realtime(mp) && xfs_has_rmapbt(mp); } static inline bool xfs_has_rtreflink(const struct xfs_mount *mp) { return xfs_has_metadir(mp) && xfs_has_realtime(mp) && xfs_has_reflink(mp); } static inline bool xfs_has_nonzoned(const struct xfs_mount *mp) { return !xfs_has_zoned(mp); } static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp) { return xfs_has_reflink(mp); } /* * Some features are always on for v5 file systems, allow the compiler to * eliminiate dead code when building without v4 support. */ #define __XFS_HAS_V4_FEAT(name, NAME) \ static inline bool xfs_has_ ## name (struct xfs_mount *mp) \ { \ return !IS_ENABLED(CONFIG_XFS_SUPPORT_V4) || \ (mp->m_features & XFS_FEAT_ ## NAME); \ } #define __XFS_ADD_V4_FEAT(name, NAME) \ __XFS_HAS_V4_FEAT(name, NAME); \ static inline void xfs_add_ ## name (struct xfs_mount *mp) \ { \ if (IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) { \ mp->m_features |= XFS_FEAT_ ## NAME; \ xfs_sb_version_add ## name(&mp->m_sb); \ } \ } __XFS_HAS_V4_FEAT(align, ALIGN) __XFS_HAS_V4_FEAT(logv2, LOGV2) __XFS_HAS_V4_FEAT(extflg, EXTFLG) __XFS_HAS_V4_FEAT(lazysbcount, LAZYSBCOUNT) __XFS_ADD_V4_FEAT(projid32, PROJID32) __XFS_HAS_V4_FEAT(v3inodes, V3INODES) __XFS_HAS_V4_FEAT(crc, CRC) __XFS_HAS_V4_FEAT(pquotino, PQUOTINO) static inline void xfs_add_attr2(struct xfs_mount *mp) { if (IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) xfs_sb_version_addattr2(&mp->m_sb); } /* * Mount features * * These do not change dynamically - features that can come and go, such as 32 * bit inodes and read-only state, are kept as operational state rather than * features. */ __XFS_HAS_FEAT(noalign, NOALIGN) __XFS_HAS_FEAT(allocsize, ALLOCSIZE) __XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE) __XFS_HAS_FEAT(wsync, WSYNC) __XFS_HAS_FEAT(dirsync, DIRSYNC) __XFS_HAS_FEAT(discard, DISCARD) __XFS_HAS_FEAT(grpid, GRPID) __XFS_HAS_FEAT(small_inums, SMALL_INUMS) __XFS_HAS_FEAT(swalloc, SWALLOC) __XFS_HAS_FEAT(filestreams, FILESTREAMS) __XFS_HAS_FEAT(dax_always, DAX_ALWAYS) __XFS_HAS_FEAT(dax_never, DAX_NEVER) __XFS_HAS_FEAT(norecovery, NORECOVERY) __XFS_HAS_FEAT(nouuid, NOUUID) /* * Operational mount state flags * * Use these with atomic bit ops only! */ #define XFS_OPSTATE_UNMOUNTING 0 /* filesystem is unmounting */ #define XFS_OPSTATE_CLEAN 1 /* mount was clean */ #define XFS_OPSTATE_SHUTDOWN 2 /* stop all fs operations */ #define XFS_OPSTATE_INODE32 3 /* inode32 allocator active */ #define XFS_OPSTATE_READONLY 4 /* read-only fs */ /* * If set, inactivation worker threads will be scheduled to process queued * inodegc work. If not, queued inodes remain in memory waiting to be * processed. */ #define XFS_OPSTATE_INODEGC_ENABLED 5 /* * If set, background speculative prealloc gc worker threads will be scheduled * to process queued blockgc work. If not, inodes retain their preallocations * until explicitly deleted. */ #define XFS_OPSTATE_BLOCKGC_ENABLED 6 /* Kernel has logged a warning about shrink being used on this fs. */ #define XFS_OPSTATE_WARNED_SHRINK 9 /* Kernel has logged a warning about logged xattr updates being used. */ #define XFS_OPSTATE_WARNED_LARP 10 /* Mount time quotacheck is running */ #define XFS_OPSTATE_QUOTACHECK_RUNNING 11 /* Do we want to clear log incompat flags? */ #define XFS_OPSTATE_UNSET_LOG_INCOMPAT 12 /* Filesystem can use logged extended attributes */ #define XFS_OPSTATE_USE_LARP 13 /* Kernel has logged a warning about blocksize > pagesize on this fs. */ #define XFS_OPSTATE_WARNED_LBS 14 /* Kernel has logged a warning about metadata dirs being used on this fs. */ #define XFS_OPSTATE_WARNED_METADIR 17 /* Filesystem should use qflags to determine quotaon status */ #define XFS_OPSTATE_RESUMING_QUOTAON 18 /* Kernel has logged a warning about zoned RT device being used on this fs. */ #define XFS_OPSTATE_WARNED_ZONED 19 /* (Zoned) GC is in progress */ #define XFS_OPSTATE_ZONEGC_RUNNING 20 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ { \ return test_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ } \ static inline bool xfs_clear_ ## name (struct xfs_mount *mp) \ { \ return test_and_clear_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ } \ static inline bool xfs_set_ ## name (struct xfs_mount *mp) \ { \ return test_and_set_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ } __XFS_IS_OPSTATE(unmounting, UNMOUNTING) __XFS_IS_OPSTATE(clean, CLEAN) __XFS_IS_OPSTATE(shutdown, SHUTDOWN) __XFS_IS_OPSTATE(inode32, INODE32) __XFS_IS_OPSTATE(readonly, READONLY) __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED) __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) #ifdef CONFIG_XFS_QUOTA __XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING) __XFS_IS_OPSTATE(resuming_quotaon, RESUMING_QUOTAON) #else static inline bool xfs_is_quotacheck_running(struct xfs_mount *mp) { return false; } static inline bool xfs_is_resuming_quotaon(struct xfs_mount *mp) { return false; } static inline void xfs_set_resuming_quotaon(struct xfs_mount *m) { } static inline bool xfs_clear_resuming_quotaon(struct xfs_mount *mp) { return false; } #endif /* CONFIG_XFS_QUOTA */ __XFS_IS_OPSTATE(done_with_log_incompat, UNSET_LOG_INCOMPAT) __XFS_IS_OPSTATE(using_logged_xattrs, USE_LARP) __XFS_IS_OPSTATE(zonegc_running, ZONEGC_RUNNING) static inline bool xfs_should_warn(struct xfs_mount *mp, long nr) { return !test_and_set_bit(nr, &mp->m_opstate); } #define XFS_OPSTATE_STRINGS \ { (1UL << XFS_OPSTATE_UNMOUNTING), "unmounting" }, \ { (1UL << XFS_OPSTATE_CLEAN), "clean" }, \ { (1UL << XFS_OPSTATE_SHUTDOWN), "shutdown" }, \ { (1UL << XFS_OPSTATE_INODE32), "inode32" }, \ { (1UL << XFS_OPSTATE_READONLY), "read_only" }, \ { (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \ { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \ { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \ { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \ { (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }, \ { (1UL << XFS_OPSTATE_UNSET_LOG_INCOMPAT), "unset_log_incompat" }, \ { (1UL << XFS_OPSTATE_USE_LARP), "logged_xattrs" } /* * Max and min values for mount-option defined I/O * preallocation sizes. */ #define XFS_MAX_IO_LOG 30 /* 1G */ #define XFS_MIN_IO_LOG PAGE_SHIFT void xfs_do_force_shutdown(struct xfs_mount *mp, uint32_t flags, char *fname, int lnnum); #define xfs_force_shutdown(m,f) \ xfs_do_force_shutdown(m, f, __FILE__, __LINE__) #define SHUTDOWN_META_IO_ERROR (1u << 0) /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR (1u << 1) /* write attempt to the log failed */ #define SHUTDOWN_FORCE_UMOUNT (1u << 2) /* shutdown from a forced unmount */ #define SHUTDOWN_CORRUPT_INCORE (1u << 3) /* corrupt in-memory structures */ #define SHUTDOWN_CORRUPT_ONDISK (1u << 4) /* corrupt metadata on device */ #define SHUTDOWN_DEVICE_REMOVED (1u << 5) /* device removed underneath us */ #define XFS_SHUTDOWN_STRINGS \ { SHUTDOWN_META_IO_ERROR, "metadata_io" }, \ { SHUTDOWN_LOG_IO_ERROR, "log_io" }, \ { SHUTDOWN_FORCE_UMOUNT, "force_umount" }, \ { SHUTDOWN_CORRUPT_INCORE, "corruption" }, \ { SHUTDOWN_DEVICE_REMOVED, "device_removed" } /* * Flags for xfs_mountfs */ #define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ static inline xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) { xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); do_div(ld, mp->m_sb.sb_agblocks); return (xfs_agnumber_t) ld; } static inline xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) { xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); } extern void xfs_uuid_table_free(void); uint64_t xfs_default_resblks(struct xfs_mount *mp, enum xfs_free_counter ctr); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); /* * Deltas for the block count can vary from 1 to very large, but lock contention * only occurs on frequent small block count updates such as in the delayed * allocation path for buffered writes (page a time updates). Hence we set * a large batch count (1024) to minimise global counter updates except when * we get near to ENOSPC and we have to be very accurate with our updates. */ #define XFS_FDBLOCKS_BATCH 1024 uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp, enum xfs_free_counter ctr); /* * Sum up the freecount, but never return negative values. */ static inline s64 xfs_sum_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr) { return percpu_counter_sum_positive(&mp->m_free[ctr].count); } /* * Same as above, but does return negative values. Mostly useful for * special cases like repair and tracing. */ static inline s64 xfs_sum_freecounter_raw(struct xfs_mount *mp, enum xfs_free_counter ctr) { return percpu_counter_sum(&mp->m_free[ctr].count); } /* * This just provides and estimate without the cpu-local updates, use * xfs_sum_freecounter for the exact value. */ static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr) { return percpu_counter_read_positive(&mp->m_free[ctr].count); } static inline int xfs_compare_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, s64 rhs, s32 batch) { return __percpu_counter_compare(&mp->m_free[ctr].count, rhs, batch); } static inline void xfs_set_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, uint64_t val) { percpu_counter_set(&mp->m_free[ctr].count, val); } int xfs_dec_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, uint64_t delta, bool rsvd); void xfs_add_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, uint64_t delta); static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta, bool reserved) { return xfs_dec_freecounter(mp, XC_FREE_BLOCKS, delta, reserved); } static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta) { xfs_add_freecounter(mp, XC_FREE_BLOCKS, delta); } static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta) { return xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, delta, false); } static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta) { xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, delta); } extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern bool xfs_fs_writable(struct xfs_mount *mp, int level); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern void xfs_set_low_space_thresholds(struct xfs_mount *); int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, xfs_off_t count_fsb); struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, int error_class, int error); void xfs_force_summary_recalc(struct xfs_mount *mp); int xfs_add_incompat_log_feature(struct xfs_mount *mp, uint32_t feature); bool xfs_clear_incompat_log_features(struct xfs_mount *mp); void xfs_mod_delalloc(struct xfs_inode *ip, int64_t data_delta, int64_t ind_delta); static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta) { percpu_counter_add(&mp->m_delalloc_blks, delta); } int xfs_set_max_atomic_write_opt(struct xfs_mount *mp, unsigned long long new_max_bytes); static inline struct xfs_buftarg * xfs_group_type_buftarg( struct xfs_mount *mp, enum xfs_group_type type) { switch (type) { case XG_TYPE_AG: return mp->m_ddev_targp; case XG_TYPE_RTG: return mp->m_rtdev_targp; default: ASSERT(0); break; } return NULL; } #endif /* __XFS_MOUNT_H__ */ |
| 60 60 121 121 221 221 64 64 64 274 275 1 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 | // SPDX-License-Identifier: GPL-2.0 /* * dax: direct host memory access * Copyright (C) 2020 Red Hat, Inc. */ #include "fuse_i.h" #include <linux/delay.h> #include <linux/dax.h> #include <linux/uio.h> #include <linux/pagemap.h> #include <linux/iomap.h> #include <linux/interval_tree.h> /* * Default memory range size. A power of 2 so it agrees with common FUSE_INIT * map_alignment values 4KB and 64KB. */ #define FUSE_DAX_SHIFT 21 #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) /* Number of ranges reclaimer will try to free in one invocation */ #define FUSE_DAX_RECLAIM_CHUNK (10) /* * Dax memory reclaim threshold in percetage of total ranges. When free * number of free ranges drops below this threshold, reclaim can trigger * Default is 20% */ #define FUSE_DAX_RECLAIM_THRESHOLD (20) /** Translation information for file offsets to DAX window offsets */ struct fuse_dax_mapping { /* Pointer to inode where this memory range is mapped */ struct inode *inode; /* Will connect in fcd->free_ranges to keep track of free memory */ struct list_head list; /* For interval tree in file/inode */ struct interval_tree_node itn; /* Will connect in fc->busy_ranges to keep track busy memory */ struct list_head busy_list; /** Position in DAX window */ u64 window_offset; /** Length of mapping, in bytes */ loff_t length; /* Is this mapping read-only or read-write */ bool writable; /* reference count when the mapping is used by dax iomap. */ refcount_t refcnt; }; /* Per-inode dax map */ struct fuse_inode_dax { /* Semaphore to protect modifications to the dmap tree */ struct rw_semaphore sem; /* Sorted rb tree of struct fuse_dax_mapping elements */ struct rb_root_cached tree; unsigned long nr; }; struct fuse_conn_dax { /* DAX device */ struct dax_device *dev; /* Lock protecting accessess to members of this structure */ spinlock_t lock; /* List of memory ranges which are busy */ unsigned long nr_busy_ranges; struct list_head busy_ranges; /* Worker to free up memory ranges */ struct delayed_work free_work; /* Wait queue for a dax range to become free */ wait_queue_head_t range_waitq; /* DAX Window Free Ranges */ long nr_free_ranges; struct list_head free_ranges; unsigned long nr_ranges; }; static inline struct fuse_dax_mapping * node_to_dmap(struct interval_tree_node *node) { if (!node) return NULL; return container_of(node, struct fuse_dax_mapping, itn); } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); static void __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { unsigned long free_threshold; /* If number of free ranges are below threshold, start reclaim */ free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 1); if (fcd->nr_free_ranges < free_threshold) queue_delayed_work(system_long_wq, &fcd->free_work, msecs_to_jiffies(delay_ms)); } static void kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { spin_lock(&fcd->lock); __kick_dmap_free_worker(fcd, delay_ms); spin_unlock(&fcd->lock); } static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) { struct fuse_dax_mapping *dmap; spin_lock(&fcd->lock); dmap = list_first_entry_or_null(&fcd->free_ranges, struct fuse_dax_mapping, list); if (dmap) { list_del_init(&dmap->list); WARN_ON(fcd->nr_free_ranges <= 0); fcd->nr_free_ranges--; } __kick_dmap_free_worker(fcd, 0); spin_unlock(&fcd->lock); return dmap; } /* This assumes fcd->lock is held */ static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_del_init(&dmap->busy_list); WARN_ON(fcd->nr_busy_ranges == 0); fcd->nr_busy_ranges--; } static void dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { spin_lock(&fcd->lock); __dmap_remove_busy_list(fcd, dmap); spin_unlock(&fcd->lock); } /* This assumes fcd->lock is held */ static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_add_tail(&dmap->list, &fcd->free_ranges); fcd->nr_free_ranges++; wake_up(&fcd->range_waitq); } static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { /* Return fuse_dax_mapping to free list */ spin_lock(&fcd->lock); __dmap_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); } static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, struct fuse_dax_mapping *dmap, bool writable, bool upgrade) { struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn_dax *fcd = fm->fc->dax; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_setupmapping_in inarg; loff_t offset = start_idx << FUSE_DAX_SHIFT; FUSE_ARGS(args); ssize_t err; WARN_ON(fcd->nr_free_ranges < 0); /* Ask fuse daemon to setup mapping */ memset(&inarg, 0, sizeof(inarg)); inarg.foffset = offset; inarg.fh = -1; inarg.moffset = dmap->window_offset; inarg.len = FUSE_DAX_SZ; inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; if (writable) inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; args.opcode = FUSE_SETUPMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; err = fuse_simple_request(fm, &args); if (err < 0) return err; dmap->writable = writable; if (!upgrade) { /* * We don't take a reference on inode. inode is valid right now * and when inode is going away, cleanup logic should first * cleanup dmap entries. */ dmap->inode = inode; dmap->itn.start = dmap->itn.last = start_idx; /* Protected by fi->dax->sem */ interval_tree_insert(&dmap->itn, &fi->dax->tree); fi->dax->nr++; spin_lock(&fcd->lock); list_add_tail(&dmap->busy_list, &fcd->busy_ranges); fcd->nr_busy_ranges++; spin_unlock(&fcd->lock); } return 0; } static int fuse_send_removemapping(struct inode *inode, struct fuse_removemapping_in *inargp, struct fuse_removemapping_one *remove_one) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); args.opcode = FUSE_REMOVEMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 3; fuse_set_zero_arg0(&args); args.in_args[1].size = sizeof(*inargp); args.in_args[1].value = inargp; args.in_args[2].size = inargp->count * sizeof(*remove_one); args.in_args[2].value = remove_one; return fuse_simple_request(fm, &args); } static int dmap_removemapping_list(struct inode *inode, unsigned int num, struct list_head *to_remove) { struct fuse_removemapping_one *remove_one, *ptr; struct fuse_removemapping_in inarg; struct fuse_dax_mapping *dmap; int ret, i = 0, nr_alloc; nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); if (!remove_one) return -ENOMEM; ptr = remove_one; list_for_each_entry(dmap, to_remove, list) { ptr->moffset = dmap->window_offset; ptr->len = dmap->length; ptr++; i++; num--; if (i >= nr_alloc || num == 0) { memset(&inarg, 0, sizeof(inarg)); inarg.count = i; ret = fuse_send_removemapping(inode, &inarg, remove_one); if (ret) goto out; ptr = remove_one; i = 0; } } out: kfree(remove_one); return ret; } /* * Cleanup dmap entry and add back to free list. This should be called with * fcd->lock held. */ static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", dmap->itn.start, dmap->itn.last, dmap->window_offset, dmap->length); __dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; __dmap_add_to_free_pool(fcd, dmap); } /* * Free inode dmap entries whose range falls inside [start, end]. * Does not take any locks. At this point of time it should only be * called from evict_inode() path where we know all dmap entries can be * reclaimed. */ static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, struct inode *inode, loff_t start, loff_t end) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap, *n; int err, num = 0; LIST_HEAD(to_remove); unsigned long start_idx = start >> FUSE_DAX_SHIFT; unsigned long end_idx = end >> FUSE_DAX_SHIFT; struct interval_tree_node *node; while (1) { node = interval_tree_iter_first(&fi->dax->tree, start_idx, end_idx); if (!node) break; dmap = node_to_dmap(node); /* inode is going away. There should not be any users of dmap */ WARN_ON(refcount_read(&dmap->refcnt) > 1); interval_tree_remove(&dmap->itn, &fi->dax->tree); num++; list_add(&dmap->list, &to_remove); } /* Nothing to remove */ if (list_empty(&to_remove)) return; WARN_ON(fi->dax->nr < num); fi->dax->nr -= num; err = dmap_removemapping_list(inode, num, &to_remove); if (err && err != -ENOTCONN) { pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", start, end); } spin_lock(&fcd->lock); list_for_each_entry_safe(dmap, n, &to_remove, list) { list_del_init(&dmap->list); dmap_reinit_add_to_free_pool(fcd, dmap); } spin_unlock(&fcd->lock); } static int dmap_removemapping_one(struct inode *inode, struct fuse_dax_mapping *dmap) { struct fuse_removemapping_one forget_one; struct fuse_removemapping_in inarg; memset(&inarg, 0, sizeof(inarg)); inarg.count = 1; memset(&forget_one, 0, sizeof(forget_one)); forget_one.moffset = dmap->window_offset; forget_one.len = dmap->length; return fuse_send_removemapping(inode, &inarg, &forget_one); } /* * It is called from evict_inode() and by that time inode is going away. So * this function does not take any locks like fi->dax->sem for traversing * that fuse inode interval tree. If that lock is taken then lock validator * complains of deadlock situation w.r.t fs_reclaim lock. */ void fuse_dax_inode_cleanup(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); /* * fuse_evict_inode() has already called truncate_inode_pages_final() * before we arrive here. So we should not have to worry about any * pages/exception entries still associated with inode. */ inode_reclaim_dmap_range(fc->dax, inode, 0, -1); WARN_ON(fi->dax->nr); } static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) { iomap->addr = IOMAP_NULL_ADDR; iomap->length = length; iomap->type = IOMAP_HOLE; } static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap, struct fuse_dax_mapping *dmap, unsigned int flags) { loff_t offset, len; loff_t i_size = i_size_read(inode); offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); len = min(length, dmap->length - offset); /* If length is beyond end of file, truncate further */ if (pos + len > i_size) len = i_size - pos; if (len > 0) { iomap->addr = dmap->window_offset + offset; iomap->length = len; if (flags & IOMAP_FAULT) iomap->length = ALIGN(len, PAGE_SIZE); iomap->type = IOMAP_MAPPED; /* * increace refcnt so that reclaim code knows this dmap is in * use. This assumes fi->dax->sem mutex is held either * shared/exclusive. */ refcount_inc(&dmap->refcnt); /* iomap->private should be NULL */ WARN_ON_ONCE(iomap->private); iomap->private = dmap; } else { /* Mapping beyond end of file is hole */ fuse_fill_iomap_hole(iomap, length); } } static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; int ret; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Can't do inline reclaim in fault path. We call * dax_layout_busy_page() before we free a range. And * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it. * In fault path we enter with mapping->invalidate_lock held and can't * drop it. Also in fault path we hold mapping->invalidate_lock shared * and not exclusive, so that creates further issues with * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault() * will wait for a memory range to become free and retry. */ if (flags & IOMAP_FAULT) { alloc_dmap = alloc_dax_mapping(fcd); if (!alloc_dmap) return -EAGAIN; } else { alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); if (IS_ERR(alloc_dmap)) return PTR_ERR(alloc_dmap); } /* If we are here, we should have memory allocated */ if (WARN_ON(!alloc_dmap)) return -EIO; /* * Take write lock so that only one caller can try to setup mapping * and other waits. */ down_write(&fi->dax->sem); /* * We dropped lock. Check again if somebody else setup * mapping already. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return 0; } /* Setup one mapping */ ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, writable, false); if (ret < 0) { dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return ret; } fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); up_write(&fi->dax->sem); return 0; } static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; int ret; unsigned long idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Take exclusive lock so that only one caller can try to setup * mapping and others wait. */ down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, idx, idx); /* We are holding either inode lock or invalidate_lock, and that should * ensure that dmap can't be truncated. We are holding a reference * on dmap and that should make sure it can't be reclaimed. So dmap * should still be there in tree despite the fact we dropped and * re-acquired the fi->dax->sem lock. */ ret = -EIO; if (WARN_ON(!node)) goto out_err; dmap = node_to_dmap(node); /* We took an extra reference on dmap to make sure its not reclaimd. * Now we hold fi->dax->sem lock and that reference is not needed * anymore. Drop it. */ if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } /* Maybe another thread already upgraded mapping while we were not * holding lock. */ if (dmap->writable) { ret = 0; goto out_fill_iomap; } ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, true); if (ret < 0) goto out_err; out_fill_iomap: fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); out_err: up_write(&fi->dax->sem); return ret; } /* This is just for DAX and the mapping is ephemeral, do not use it for other * purposes since there is no block device with a permanent mapping. */ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_dax_mapping *dmap; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* We don't support FIEMAP */ if (WARN_ON(flags & IOMAP_REPORT)) return -EIO; iomap->offset = pos; iomap->flags = 0; iomap->bdev = NULL; iomap->dax_dev = fc->dax->dev; /* * Both read/write and mmap path can race here. So we need something * to make sure if we are setting up mapping, then other path waits * * For now, use a semaphore for this. It probably needs to be * optimized later. */ down_read(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); if (writable && !dmap->writable) { /* Upgrade read-only mapping to read-write. This will * require exclusive fi->dax->sem lock as we don't want * two threads to be trying to this simultaneously * for same dmap. So drop shared lock and acquire * exclusive lock. * * Before dropping fi->dax->sem lock, take reference * on dmap so that its not freed by range reclaim. */ refcount_inc(&dmap->refcnt); up_read(&fi->dax->sem); pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); return fuse_upgrade_dax_mapping(inode, pos, length, flags, iomap); } else { fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); up_read(&fi->dax->sem); return 0; } } else { up_read(&fi->dax->sem); pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); if (pos >= i_size_read(inode)) goto iomap_hole; return fuse_setup_new_dax_mapping(inode, pos, length, flags, iomap); } /* * If read beyond end of file happens, fs code seems to return * it as hole */ iomap_hole: fuse_fill_iomap_hole(iomap, length); pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", __func__, pos, length, iomap->length); return 0; } static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned int flags, struct iomap *iomap) { struct fuse_dax_mapping *dmap = iomap->private; if (dmap) { if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } } /* DAX writes beyond end-of-file aren't handled using iomap, so the * file size is unchanged and there is nothing to do here. */ return 0; } static const struct iomap_ops fuse_iomap_ops = { .iomap_begin = fuse_iomap_begin, .iomap_end = fuse_iomap_end, }; static void fuse_wait_dax_page(struct inode *inode) { filemap_invalidate_unlock(inode->i_mapping); schedule(); filemap_invalidate_lock(inode->i_mapping); } /* Should be called with mapping->invalidate_lock held exclusively. */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { return dax_break_layout(inode, dmap_start, dmap_end, fuse_wait_dax_page); } ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { inode_lock_shared(inode); } ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); inode_unlock_shared(inode); /* TODO file_accessed(iocb->f_filp) */ return ret; } static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); return (iov_iter_rw(from) == WRITE && ((iocb->ki_pos) >= i_size_read(inode) || (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); } static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) return -EAGAIN; } else { inode_lock(inode); } ret = generic_write_checks(iocb, from); if (ret <= 0) goto out; ret = file_remove_privs(iocb->ki_filp); if (ret) goto out; /* TODO file_update_time() but we don't want metadata I/O */ /* Do not use dax for file extending writes as write and on * disk i_size increase are not atomic otherwise. */ if (file_extending_write(iocb, from)) ret = fuse_dax_direct_write(iocb, from); else ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, bool write) { vm_fault_t ret; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; unsigned long pfn; int error = 0; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; bool retry = false; if (write) sb_start_pagefault(sb); retry: if (retry && !(fcd->nr_free_ranges > 0)) wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); /* * We need to serialize against not only truncate but also against * fuse dax memory range reclaim. While a range is being reclaimed, * we do not want any read/write/mmap to make progress and try * to populate page cache or access memory we are trying to free. */ filemap_invalidate_lock_shared(inode->i_mapping); ret = dax_iomap_fault(vmf, order, &pfn, &error, &fuse_iomap_ops); if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { error = 0; retry = true; filemap_invalidate_unlock_shared(inode->i_mapping); goto retry; } if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, order, pfn); filemap_invalidate_unlock_shared(inode->i_mapping); if (write) sb_end_pagefault(sb); return ret; } static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, unsigned int order) { return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static const struct vm_operations_struct fuse_dax_vm_ops = { .fault = fuse_dax_fault, .huge_fault = fuse_dax_huge_fault, .page_mkwrite = fuse_dax_page_mkwrite, .pfn_mkwrite = fuse_dax_pfn_mkwrite, }; int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &fuse_dax_vm_ops; vm_flags_set(vma, VM_MIXEDMAP | VM_HUGEPAGE); return 0; } static int dmap_writeback_invalidate(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); if (ret) { pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", ret, start_pos, end_pos); return ret; } ret = invalidate_inode_pages2_range(inode->i_mapping, start_pos >> PAGE_SHIFT, end_pos >> PAGE_SHIFT); if (ret) pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", ret); return ret; } static int reclaim_one_dmap_locked(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); /* * igrab() was done to make sure inode won't go under us, and this * further avoids the race with evict(). */ ret = dmap_writeback_invalidate(inode, dmap); if (ret) return ret; /* Remove dax mapping from inode interval tree now */ interval_tree_remove(&dmap->itn, &fi->dax->tree); fi->dax->nr--; /* It is possible that umount/shutdown has killed the fuse connection * and worker thread is trying to reclaim memory in parallel. Don't * warn in that case. */ ret = dmap_removemapping_one(inode, dmap); if (ret && ret != -ENOTCONN) { pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", dmap->window_offset, dmap->length, ret); } return 0; } /* Find first mapped dmap for an inode and return file offset. Caller needs * to hold fi->dax->sem lock either shared or exclusive. */ static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; node = interval_tree_iter_next(node, 0, -1)) { dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) continue; return dmap; } return NULL; } /* * Find first mapping in the tree and free it and return it. Do not add * it back to free pool. */ static struct fuse_dax_mapping * inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, bool *retry) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; u64 dmap_start, dmap_end; unsigned long start_idx; int ret; struct interval_tree_node *node; filemap_invalidate_lock(inode->i_mapping); /* Lookup a dmap and corresponding file offset to reclaim. */ down_read(&fi->dax->sem); dmap = inode_lookup_first_dmap(inode); if (dmap) { start_idx = dmap->itn.start; dmap_start = start_idx << FUSE_DAX_SHIFT; dmap_end = dmap_start + FUSE_DAX_SZ - 1; } up_read(&fi->dax->sem); if (!dmap) goto out_mmap_sem; /* * Make sure there are no references to inode pages using * get_user_pages() */ ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", ret); dmap = ERR_PTR(ret); goto out_mmap_sem; } down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got reclaimed by somebody else */ if (!node) { if (retry) *retry = true; goto out_write_dmap_sem; } dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) { dmap = NULL; if (retry) *retry = true; goto out_write_dmap_sem; } ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) { dmap = ERR_PTR(ret); goto out_write_dmap_sem; } /* Clean up dmap. Do not add back to free list */ dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", __func__, inode, dmap->window_offset, dmap->length); out_write_dmap_sem: up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return dmap; } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) { struct fuse_dax_mapping *dmap; struct fuse_inode *fi = get_fuse_inode(inode); while (1) { bool retry = false; dmap = alloc_dax_mapping(fcd); if (dmap) return dmap; dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); /* * Either we got a mapping or it is an error, return in both * the cases. */ if (dmap) return dmap; /* If we could not reclaim a mapping because it * had a reference or some other temporary failure, * Try again. We want to give up inline reclaim only * if there is no range assigned to this node. Otherwise * if a deadlock is possible if we sleep with * mapping->invalidate_lock held and worker to free memory * can't make progress due to unavailability of * mapping->invalidate_lock. So sleep only if fi->dax->nr=0 */ if (retry) continue; /* * There are no mappings which can be reclaimed. Wait for one. * We are not holding fi->dax->sem. So it is possible * that range gets added now. But as we are not holding * mapping->invalidate_lock, worker should still be able to * free up a range and wake us up. */ if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { if (wait_event_killable_exclusive(fcd->range_waitq, (fcd->nr_free_ranges > 0))) { return ERR_PTR(-EINTR); } } } } static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; /* Find fuse dax mapping at file offset inode. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got cleaned up by somebody else */ if (!node) return 0; dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) return 0; ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) return ret; /* Cleanup dmap entry and add back to free list */ spin_lock(&fcd->lock); dmap_reinit_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); return ret; } /* * Free a range of memory. * Locking: * 1. Take mapping->invalidate_lock to block dax faults. * 2. Take fi->dax->sem to protect interval tree and also to make sure * read/write can not reuse a dmap which we might be freeing. */ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx, unsigned long end_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; filemap_invalidate_lock(inode->i_mapping); ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", ret); goto out_mmap_sem; } down_write(&fi->dax->sem); ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return ret; } static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, unsigned long nr_to_free) { struct fuse_dax_mapping *dmap, *pos, *temp; int ret, nr_freed = 0; unsigned long start_idx = 0, end_idx = 0; struct inode *inode = NULL; /* Pick first busy range and free it for now*/ while (1) { if (nr_freed >= nr_to_free) break; dmap = NULL; spin_lock(&fcd->lock); if (!fcd->nr_busy_ranges) { spin_unlock(&fcd->lock); return 0; } list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, busy_list) { /* skip this range if it's in use. */ if (refcount_read(&pos->refcnt) > 1) continue; inode = igrab(pos->inode); /* * This inode is going away. That will free * up all the ranges anyway, continue to * next range. */ if (!inode) continue; /* * Take this element off list and add it tail. If * this element can't be freed, it will help with * selecting new element in next iteration of loop. */ dmap = pos; list_move_tail(&dmap->busy_list, &fcd->busy_ranges); start_idx = end_idx = dmap->itn.start; break; } spin_unlock(&fcd->lock); if (!dmap) return 0; ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); iput(inode); if (ret) return ret; nr_freed++; } return 0; } static void fuse_dax_free_mem_worker(struct work_struct *work) { int ret; struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, free_work.work); ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); if (ret) { pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", ret); } /* If number of free ranges are still below threshold, requeue */ kick_dmap_free_worker(fcd, 1); } static void fuse_free_dax_mem_ranges(struct list_head *mem_list) { struct fuse_dax_mapping *range, *temp; /* Free All allocated elements */ list_for_each_entry_safe(range, temp, mem_list, list) { list_del(&range->list); if (!list_empty(&range->busy_list)) list_del(&range->busy_list); kfree(range); } } void fuse_dax_conn_free(struct fuse_conn *fc) { if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); fc->dax = NULL; } } static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; unsigned long i; init_waitqueue_head(&fcd->range_waitq); INIT_LIST_HEAD(&fcd->free_ranges); INIT_LIST_HEAD(&fcd->busy_ranges); INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), DAX_ACCESS, NULL, NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); return nr_pages; } nr_ranges = nr_pages/FUSE_DAX_PAGES; pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", __func__, nr_pages, nr_ranges); for (i = 0; i < nr_ranges; i++) { range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); ret = -ENOMEM; if (!range) goto out_err; /* TODO: This offset only works if virtio-fs driver is not * having some memory hidden at the beginning. This needs * better handling */ range->window_offset = i * FUSE_DAX_SZ; range->length = FUSE_DAX_SZ; INIT_LIST_HEAD(&range->busy_list); refcount_set(&range->refcnt, 1); list_add_tail(&range->list, &fcd->free_ranges); } fcd->nr_free_ranges = nr_ranges; fcd->nr_ranges = nr_ranges; return 0; out_err: /* Free All allocated elements */ fuse_free_dax_mem_ranges(&fcd->free_ranges); return ret; } int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode, struct dax_device *dax_dev) { struct fuse_conn_dax *fcd; int err; fc->dax_mode = dax_mode; if (!dax_dev) return 0; fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); if (!fcd) return -ENOMEM; spin_lock_init(&fcd->lock); fcd->dev = dax_dev; err = fuse_dax_mem_range_init(fcd); if (err) { kfree(fcd); return err; } fc->dax = fcd; return 0; } bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) { struct fuse_conn *fc = get_fuse_conn_super(sb); fi->dax = NULL; if (fc->dax) { fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); if (!fi->dax) return false; init_rwsem(&fi->dax->sem); fi->dax->tree = RB_ROOT_CACHED; } return true; } static const struct address_space_operations fuse_dax_file_aops = { .direct_IO = noop_direct_IO, .dirty_folio = noop_dirty_folio, }; static bool fuse_should_enable_dax(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); enum fuse_dax_mode dax_mode = fc->dax_mode; if (dax_mode == FUSE_DAX_NEVER) return false; /* * fc->dax may be NULL in 'inode' mode when filesystem device doesn't * support DAX, in which case it will silently fallback to 'never' mode. */ if (!fc->dax) return false; if (dax_mode == FUSE_DAX_ALWAYS) return true; /* dax_mode is FUSE_DAX_INODE* */ return fc->inode_dax && (flags & FUSE_ATTR_DAX); } void fuse_dax_inode_init(struct inode *inode, unsigned int flags) { if (!fuse_should_enable_dax(inode, flags)) return; inode->i_flags |= S_DAX; inode->i_data.a_ops = &fuse_dax_file_aops; } void fuse_dax_dontcache(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); if (fuse_is_inode_dax_mode(fc->dax_mode) && ((bool) IS_DAX(inode) != (bool) (flags & FUSE_ATTR_DAX))) d_mark_dontcache(inode); } bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) { if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", map_alignment, FUSE_DAX_SZ); return false; } return true; } void fuse_dax_cancel_work(struct fuse_conn *fc) { struct fuse_conn_dax *fcd = fc->dax; if (fcd) cancel_delayed_work_sync(&fcd->free_work); } EXPORT_SYMBOL_GPL(fuse_dax_cancel_work); |
| 1 20 20 20 20 5 5 5 5 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 5 5 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 5 5 5 20 18 18 18 20 20 20 20 5 18 18 7 7 7 7 7 18 5 5 18 5 5 18 18 18 18 18 18 18 5 5 5 5 7 7 7 7 7 5 5 5 5 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/power/snapshot.c * * This file provides system snapshot/restore functionality for swsusp. * * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> */ #define pr_fmt(fmt) "PM: hibernation: " fmt #include <linux/version.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/suspend.h> #include <linux/delay.h> #include <linux/bitops.h> #include <linux/spinlock.h> #include <linux/kernel.h> #include <linux/pm.h> #include <linux/device.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/nmi.h> #include <linux/syscalls.h> #include <linux/console.h> #include <linux/highmem.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/compiler.h> #include <linux/ktime.h> #include <linux/set_memory.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> #include <asm/io.h> #include "power.h" #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) static bool hibernate_restore_protection; static bool hibernate_restore_protection_active; void enable_restore_image_protection(void) { hibernate_restore_protection = true; } static inline void hibernate_restore_protection_begin(void) { hibernate_restore_protection_active = hibernate_restore_protection; } static inline void hibernate_restore_protection_end(void) { hibernate_restore_protection_active = false; } static inline int __must_check hibernate_restore_protect_page(void *page_address) { if (hibernate_restore_protection_active) return set_memory_ro((unsigned long)page_address, 1); return 0; } static inline int hibernate_restore_unprotect_page(void *page_address) { if (hibernate_restore_protection_active) return set_memory_rw((unsigned long)page_address, 1); return 0; } #else static inline void hibernate_restore_protection_begin(void) {} static inline void hibernate_restore_protection_end(void) {} static inline int __must_check hibernate_restore_protect_page(void *page_address) {return 0; } static inline int hibernate_restore_unprotect_page(void *page_address) {return 0; } #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ /* * The calls to set_direct_map_*() should not fail because remapping a page * here means that we only update protection bits in an existing PTE. * It is still worth to have a warning here if something changes and this * will no longer be the case. */ static inline void hibernate_map_page(struct page *page) { if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { int ret = set_direct_map_default_noflush(page); if (ret) pr_warn_once("Failed to remap page\n"); } else { debug_pagealloc_map_pages(page, 1); } } static inline void hibernate_unmap_page(struct page *page) { if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { unsigned long addr = (unsigned long)page_address(page); int ret = set_direct_map_invalid_noflush(page); if (ret) pr_warn_once("Failed to remap page\n"); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } else { debug_pagealloc_unmap_pages(page, 1); } } static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); /* * Number of bytes to reserve for memory allocations made by device drivers * from their ->freeze() and ->freeze_noirq() callbacks so that they don't * cause image creation to fail (tunable via /sys/power/reserved_size). */ unsigned long reserved_size; void __init hibernate_reserved_size_init(void) { reserved_size = SPARE_PAGES * PAGE_SIZE; } /* * Preferred image size in bytes (tunable via /sys/power/image_size). * When it is set to N, swsusp will do its best to ensure the image * size will not exceed N bytes, but if that is impossible, it will * try to create the smallest image possible. */ unsigned long image_size; void __init hibernate_image_size_init(void) { image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; } /* * List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written * directly to their "original" page frames. */ struct pbe *restore_pblist; /* struct linked_page is used to build chains of pages */ #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) struct linked_page { struct linked_page *next; char data[LINKED_PAGE_DATA_SIZE]; } __packed; /* * List of "safe" pages (ie. pages that were not used by the image kernel * before hibernation) that may be used as temporary storage for image kernel * memory contents. */ static struct linked_page *safe_pages_list; /* Pointer to an auxiliary buffer (1 page) */ static void *buffer; #define PG_ANY 0 #define PG_SAFE 1 #define PG_UNSAFE_CLEAR 1 #define PG_UNSAFE_KEEP 0 static unsigned int allocated_unsafe_pages; /** * get_image_page - Allocate a page for a hibernation image. * @gfp_mask: GFP mask for the allocation. * @safe_needed: Get pages that were not used before hibernation (restore only) * * During image restoration, for storing the PBE list and the image data, we can * only use memory pages that do not conflict with the pages used before * hibernation. The "unsafe" pages have PageNosaveFree set and we count them * using allocated_unsafe_pages. * * Each allocated image page is marked as PageNosave and PageNosaveFree so that * swsusp_free() can release it. */ static void *get_image_page(gfp_t gfp_mask, int safe_needed) { void *res; res = (void *)get_zeroed_page(gfp_mask); if (safe_needed) while (res && swsusp_page_is_free(virt_to_page(res))) { /* The page is unsafe, mark it for swsusp_free() */ swsusp_set_page_forbidden(virt_to_page(res)); allocated_unsafe_pages++; res = (void *)get_zeroed_page(gfp_mask); } if (res) { swsusp_set_page_forbidden(virt_to_page(res)); swsusp_set_page_free(virt_to_page(res)); } return res; } static void *__get_safe_page(gfp_t gfp_mask) { if (safe_pages_list) { void *ret = safe_pages_list; safe_pages_list = safe_pages_list->next; memset(ret, 0, PAGE_SIZE); return ret; } return get_image_page(gfp_mask, PG_SAFE); } unsigned long get_safe_page(gfp_t gfp_mask) { return (unsigned long)__get_safe_page(gfp_mask); } static struct page *alloc_image_page(gfp_t gfp_mask) { struct page *page; page = alloc_page(gfp_mask); if (page) { swsusp_set_page_forbidden(page); swsusp_set_page_free(page); } return page; } static void recycle_safe_page(void *page_address) { struct linked_page *lp = page_address; lp->next = safe_pages_list; safe_pages_list = lp; } /** * free_image_page - Free a page allocated for hibernation image. * @addr: Address of the page to free. * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. * * The page to free should have been allocated by get_image_page() (page flags * set by it are affected). */ static inline void free_image_page(void *addr, int clear_nosave_free) { struct page *page; BUG_ON(!virt_addr_valid(addr)); page = virt_to_page(addr); swsusp_unset_page_forbidden(page); if (clear_nosave_free) swsusp_unset_page_free(page); __free_page(page); } static inline void free_list_of_pages(struct linked_page *list, int clear_page_nosave) { while (list) { struct linked_page *lp = list->next; free_image_page(list, clear_page_nosave); list = lp; } } /* * struct chain_allocator is used for allocating small objects out of * a linked list of pages called 'the chain'. * * The chain grows each time when there is no room for a new object in * the current page. The allocated objects cannot be freed individually. * It is only possible to free them all at once, by freeing the entire * chain. * * NOTE: The chain allocator may be inefficient if the allocated objects * are not much smaller than PAGE_SIZE. */ struct chain_allocator { struct linked_page *chain; /* the chain */ unsigned int used_space; /* total size of objects allocated out of the current page */ gfp_t gfp_mask; /* mask for allocating pages */ int safe_needed; /* if set, only "safe" pages are allocated */ }; static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) { ca->chain = NULL; ca->used_space = LINKED_PAGE_DATA_SIZE; ca->gfp_mask = gfp_mask; ca->safe_needed = safe_needed; } static void *chain_alloc(struct chain_allocator *ca, unsigned int size) { void *ret; if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { struct linked_page *lp; lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : get_image_page(ca->gfp_mask, PG_ANY); if (!lp) return NULL; lp->next = ca->chain; ca->chain = lp; ca->used_space = 0; } ret = ca->chain->data + ca->used_space; ca->used_space += size; return ret; } /* * Data types related to memory bitmaps. * * Memory bitmap is a structure consisting of many linked lists of * objects. The main list's elements are of type struct zone_bitmap * and each of them corresponds to one zone. For each zone bitmap * object there is a list of objects of type struct bm_block that * represent each blocks of bitmap in which information is stored. * * struct memory_bitmap contains a pointer to the main list of zone * bitmap objects, a struct bm_position used for browsing the bitmap, * and a pointer to the list of pages used for allocating all of the * zone bitmap objects and bitmap block objects. * * NOTE: It has to be possible to lay out the bitmap in memory * using only allocations of order 0. Additionally, the bitmap is * designed to work with arbitrary number of zones (this is over the * top for now, but let's avoid making unnecessary assumptions ;-). * * struct zone_bitmap contains a pointer to a list of bitmap block * objects and a pointer to the bitmap block object that has been * most recently used for setting bits. Additionally, it contains the * PFNs that correspond to the start and end of the represented zone. * * struct bm_block contains a pointer to the memory page in which * information is stored (in the form of a block of bitmap) * It also contains the pfns that correspond to the start and end of * the represented memory area. * * The memory bitmap is organized as a radix tree to guarantee fast random * access to the bits. There is one radix tree for each zone (as returned * from create_mem_extents). * * One radix tree is represented by one struct mem_zone_bm_rtree. There are * two linked lists for the nodes of the tree, one for the inner nodes and * one for the leaf nodes. The linked leaf nodes are used for fast linear * access of the memory bitmap. * * The struct rtree_node represents one node of the radix tree. */ #define BM_END_OF_MAP (~0UL) #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) /* * struct rtree_node is a wrapper struct to link the nodes * of the rtree together for easy linear iteration over * bits and easy freeing */ struct rtree_node { struct list_head list; unsigned long *data; }; /* * struct mem_zone_bm_rtree represents a bitmap used for one * populated memory zone. */ struct mem_zone_bm_rtree { struct list_head list; /* Link Zones together */ struct list_head nodes; /* Radix Tree inner nodes */ struct list_head leaves; /* Radix Tree leaves */ unsigned long start_pfn; /* Zone start page frame */ unsigned long end_pfn; /* Zone end page frame + 1 */ struct rtree_node *rtree; /* Radix Tree Root */ int levels; /* Number of Radix Tree Levels */ unsigned int blocks; /* Number of Bitmap Blocks */ }; /* struct bm_position is used for browsing memory bitmaps */ struct bm_position { struct mem_zone_bm_rtree *zone; struct rtree_node *node; unsigned long node_pfn; unsigned long cur_pfn; int node_bit; }; struct memory_bitmap { struct list_head zones; struct linked_page *p_list; /* list of pages used to store zone bitmap objects and bitmap block objects */ struct bm_position cur; /* most recently used bit position */ }; /* Functions that operate on memory bitmaps */ #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) #if BITS_PER_LONG == 32 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) #else #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) #endif #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) /** * alloc_rtree_node - Allocate a new node and add it to the radix tree. * @gfp_mask: GFP mask for the allocation. * @safe_needed: Get pages not used before hibernation (restore only) * @ca: Pointer to a linked list of pages ("a chain") to allocate from * @list: Radix Tree node to add. * * This function is used to allocate inner nodes as well as the * leave nodes of the radix tree. It also adds the node to the * corresponding linked list passed in by the *list parameter. */ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca, struct list_head *list) { struct rtree_node *node; node = chain_alloc(ca, sizeof(struct rtree_node)); if (!node) return NULL; node->data = get_image_page(gfp_mask, safe_needed); if (!node->data) return NULL; list_add_tail(&node->list, list); return node; } /** * add_rtree_block - Add a new leave node to the radix tree. * * The leave nodes need to be allocated in order to keep the leaves * linked list in order. This is guaranteed by the zone->blocks * counter. */ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca) { struct rtree_node *node, *block, **dst; unsigned int levels_needed, block_nr; int i; block_nr = zone->blocks; levels_needed = 0; /* How many levels do we need for this block nr? */ while (block_nr) { levels_needed += 1; block_nr >>= BM_RTREE_LEVEL_SHIFT; } /* Make sure the rtree has enough levels */ for (i = zone->levels; i < levels_needed; i++) { node = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->nodes); if (!node) return -ENOMEM; node->data[0] = (unsigned long)zone->rtree; zone->rtree = node; zone->levels += 1; } /* Allocate new block */ block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); if (!block) return -ENOMEM; /* Now walk the rtree to insert the block */ node = zone->rtree; dst = &zone->rtree; block_nr = zone->blocks; for (i = zone->levels; i > 0; i--) { int index; if (!node) { node = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->nodes); if (!node) return -ENOMEM; *dst = node; } index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); index &= BM_RTREE_LEVEL_MASK; dst = (struct rtree_node **)&((*dst)->data[index]); node = *dst; } zone->blocks += 1; *dst = block; return 0; } static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free); /** * create_zone_bm_rtree - Create a radix tree for one zone. * * Allocated the mem_zone_bm_rtree structure and initializes it. * This function also allocated and builds the radix tree for the * zone. */ static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca, unsigned long start, unsigned long end) { struct mem_zone_bm_rtree *zone; unsigned int i, nr_blocks; unsigned long pages; pages = end - start; zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); if (!zone) return NULL; INIT_LIST_HEAD(&zone->nodes); INIT_LIST_HEAD(&zone->leaves); zone->start_pfn = start; zone->end_pfn = end; nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); for (i = 0; i < nr_blocks; i++) { if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); return NULL; } } return zone; } /** * free_zone_bm_rtree - Free the memory of the radix tree. * * Free all node pages of the radix tree. The mem_zone_bm_rtree * structure itself is not freed here nor are the rtree_node * structs. */ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free) { struct rtree_node *node; list_for_each_entry(node, &zone->nodes, list) free_image_page(node->data, clear_nosave_free); list_for_each_entry(node, &zone->leaves, list) free_image_page(node->data, clear_nosave_free); } static void memory_bm_position_reset(struct memory_bitmap *bm) { bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, list); bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list); bm->cur.node_pfn = 0; bm->cur.cur_pfn = BM_END_OF_MAP; bm->cur.node_bit = 0; } static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); struct mem_extent { struct list_head hook; unsigned long start; unsigned long end; }; /** * free_mem_extents - Free a list of memory extents. * @list: List of extents to free. */ static void free_mem_extents(struct list_head *list) { struct mem_extent *ext, *aux; list_for_each_entry_safe(ext, aux, list, hook) { list_del(&ext->hook); kfree(ext); } } /** * create_mem_extents - Create a list of memory extents. * @list: List to put the extents into. * @gfp_mask: Mask to use for memory allocations. * * The extents represent contiguous ranges of PFNs. */ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) { struct zone *zone; INIT_LIST_HEAD(list); for_each_populated_zone(zone) { unsigned long zone_start, zone_end; struct mem_extent *ext, *cur, *aux; zone_start = zone->zone_start_pfn; zone_end = zone_end_pfn(zone); list_for_each_entry(ext, list, hook) if (zone_start <= ext->end) break; if (&ext->hook == list || zone_end < ext->start) { /* New extent is necessary */ struct mem_extent *new_ext; new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); if (!new_ext) { free_mem_extents(list); return -ENOMEM; } new_ext->start = zone_start; new_ext->end = zone_end; list_add_tail(&new_ext->hook, &ext->hook); continue; } /* Merge this zone's range of PFNs with the existing one */ if (zone_start < ext->start) ext->start = zone_start; if (zone_end > ext->end) ext->end = zone_end; /* More merging may be possible */ cur = ext; list_for_each_entry_safe_continue(cur, aux, list, hook) { if (zone_end < cur->start) break; if (zone_end < cur->end) ext->end = cur->end; list_del(&cur->hook); kfree(cur); } } return 0; } /** * memory_bm_create - Allocate memory for a memory bitmap. */ static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) { struct chain_allocator ca; struct list_head mem_extents; struct mem_extent *ext; int error; chain_init(&ca, gfp_mask, safe_needed); INIT_LIST_HEAD(&bm->zones); error = create_mem_extents(&mem_extents, gfp_mask); if (error) return error; list_for_each_entry(ext, &mem_extents, hook) { struct mem_zone_bm_rtree *zone; zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, ext->start, ext->end); if (!zone) { error = -ENOMEM; goto Error; } list_add_tail(&zone->list, &bm->zones); } bm->p_list = ca.chain; memory_bm_position_reset(bm); Exit: free_mem_extents(&mem_extents); return error; Error: bm->p_list = ca.chain; memory_bm_free(bm, PG_UNSAFE_CLEAR); goto Exit; } /** * memory_bm_free - Free memory occupied by the memory bitmap. * @bm: Memory bitmap. */ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { struct mem_zone_bm_rtree *zone; list_for_each_entry(zone, &bm->zones, list) free_zone_bm_rtree(zone, clear_nosave_free); free_list_of_pages(bm->p_list, clear_nosave_free); INIT_LIST_HEAD(&bm->zones); } /** * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. * * Find the bit in memory bitmap @bm that corresponds to the given PFN. * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. * * Walk the radix tree to find the page containing the bit that represents @pfn * and return the position of the bit in @addr and @bit_nr. */ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, void **addr, unsigned int *bit_nr) { struct mem_zone_bm_rtree *curr, *zone; struct rtree_node *node; int i, block_nr; zone = bm->cur.zone; if (pfn >= zone->start_pfn && pfn < zone->end_pfn) goto zone_found; zone = NULL; /* Find the right zone */ list_for_each_entry(curr, &bm->zones, list) { if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { zone = curr; break; } } if (!zone) return -EFAULT; zone_found: /* * We have found the zone. Now walk the radix tree to find the leaf node * for our PFN. */ /* * If the zone we wish to scan is the current zone and the * pfn falls into the current node then we do not need to walk * the tree. */ node = bm->cur.node; if (zone == bm->cur.zone && ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; node = zone->rtree; block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; for (i = zone->levels; i > 0; i--) { int index; index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); index &= BM_RTREE_LEVEL_MASK; BUG_ON(node->data[index] == 0); node = (struct rtree_node *)node->data[index]; } node_found: /* Update last position */ bm->cur.zone = zone; bm->cur.node = node; bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; bm->cur.cur_pfn = pfn; /* Set return values */ *addr = node->data; *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; return 0; } static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); set_bit(bit, addr); } static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); if (!error) set_bit(bit, addr); return error; } static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); clear_bit(bit, addr); } static void memory_bm_clear_current(struct memory_bitmap *bm) { int bit; bit = max(bm->cur.node_bit - 1, 0); clear_bit(bit, bm->cur.node->data); } static unsigned long memory_bm_get_current(struct memory_bitmap *bm) { return bm->cur.cur_pfn; } static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); return test_bit(bit, addr); } static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; return !memory_bm_find_bit(bm, pfn, &addr, &bit); } /* * rtree_next_node - Jump to the next leaf node. * * Set the position to the beginning of the next node in the * memory bitmap. This is either the next node in the current * zone's radix tree or the first node in the radix tree of the * next zone. * * Return true if there is a next node, false otherwise. */ static bool rtree_next_node(struct memory_bitmap *bm) { if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { bm->cur.node = list_entry(bm->cur.node->list.next, struct rtree_node, list); bm->cur.node_pfn += BM_BITS_PER_BLOCK; bm->cur.node_bit = 0; touch_softlockup_watchdog(); return true; } /* No more nodes, goto next zone */ if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { bm->cur.zone = list_entry(bm->cur.zone->list.next, struct mem_zone_bm_rtree, list); bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list); bm->cur.node_pfn = 0; bm->cur.node_bit = 0; return true; } /* No more zones */ return false; } /** * memory_bm_next_pfn - Find the next set bit in a memory bitmap. * @bm: Memory bitmap. * * Starting from the last returned position this function searches for the next * set bit in @bm and returns the PFN represented by it. If no more bits are * set, BM_END_OF_MAP is returned. * * It is required to run memory_bm_position_reset() before the first call to * this function for the given memory bitmap. */ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { unsigned long bits, pfn, pages; int bit; do { pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); bit = find_next_bit(bm->cur.node->data, bits, bm->cur.node_bit); if (bit < bits) { pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; bm->cur.node_bit = bit + 1; bm->cur.cur_pfn = pfn; return pfn; } } while (rtree_next_node(bm)); bm->cur.cur_pfn = BM_END_OF_MAP; return BM_END_OF_MAP; } /* * This structure represents a range of page frames the contents of which * should not be saved during hibernation. */ struct nosave_region { struct list_head list; unsigned long start_pfn; unsigned long end_pfn; }; static LIST_HEAD(nosave_regions); static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) { struct rtree_node *node; list_for_each_entry(node, &zone->nodes, list) recycle_safe_page(node->data); list_for_each_entry(node, &zone->leaves, list) recycle_safe_page(node->data); } static void memory_bm_recycle(struct memory_bitmap *bm) { struct mem_zone_bm_rtree *zone; struct linked_page *p_list; list_for_each_entry(zone, &bm->zones, list) recycle_zone_bm_rtree(zone); p_list = bm->p_list; while (p_list) { struct linked_page *lp = p_list; p_list = lp->next; recycle_safe_page(lp); } } /** * register_nosave_region - Register a region of unsaveable memory. * * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; if (start_pfn >= end_pfn) return; if (!list_empty(&nosave_regions)) { /* Try to extend the previous region (they should be sorted) */ region = list_entry(nosave_regions.prev, struct nosave_region, list); if (region->end_pfn == start_pfn) { region->end_pfn = end_pfn; goto Report; } } /* This allocation cannot fail */ region = memblock_alloc_or_panic(sizeof(struct nosave_region), SMP_CACHE_BYTES); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); Report: pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n", (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); } /* * Set bits in this map correspond to the page frames the contents of which * should not be saved during the suspend. */ static struct memory_bitmap *forbidden_pages_map; /* Set bits in this map correspond to free page frames. */ static struct memory_bitmap *free_pages_map; /* * Each page frame allocated for creating the image is marked by setting the * corresponding bits in forbidden_pages_map and free_pages_map simultaneously */ void swsusp_set_page_free(struct page *page) { if (free_pages_map) memory_bm_set_bit(free_pages_map, page_to_pfn(page)); } static int swsusp_page_is_free(struct page *page) { return free_pages_map ? memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; } void swsusp_unset_page_free(struct page *page) { if (free_pages_map) memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); } static void swsusp_set_page_forbidden(struct page *page) { if (forbidden_pages_map) memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); } int swsusp_page_is_forbidden(struct page *page) { return forbidden_pages_map ? memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; } static void swsusp_unset_page_forbidden(struct page *page) { if (forbidden_pages_map) memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); } /** * mark_nosave_pages - Mark pages that should not be saved. * @bm: Memory bitmap. * * Set the bits in @bm that correspond to the page frames the contents of which * should not be saved. */ static void mark_nosave_pages(struct memory_bitmap *bm) { struct nosave_region *region; if (list_empty(&nosave_regions)) return; list_for_each_entry(region, &nosave_regions, list) { unsigned long pfn; pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", (unsigned long long) region->start_pfn << PAGE_SHIFT, ((unsigned long long) region->end_pfn << PAGE_SHIFT) - 1); for_each_valid_pfn(pfn, region->start_pfn, region->end_pfn) { /* * It is safe to ignore the result of * mem_bm_set_bit_check() here, since we won't * touch the PFNs for which the error is * returned anyway. */ mem_bm_set_bit_check(bm, pfn); } } } /** * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. * * Create bitmaps needed for marking page frames that should not be saved and * free page frames. The forbidden_pages_map and free_pages_map pointers are * only modified if everything goes well, because we don't want the bits to be * touched before both bitmaps are set up. */ int create_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; int error; if (forbidden_pages_map && free_pages_map) return 0; else BUG_ON(forbidden_pages_map || free_pages_map); bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm1) return -ENOMEM; error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); if (error) goto Free_first_object; bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm2) goto Free_first_bitmap; error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); if (error) goto Free_second_object; forbidden_pages_map = bm1; free_pages_map = bm2; mark_nosave_pages(forbidden_pages_map); pr_debug("Basic memory bitmaps created\n"); return 0; Free_second_object: kfree(bm2); Free_first_bitmap: memory_bm_free(bm1, PG_UNSAFE_CLEAR); Free_first_object: kfree(bm1); return -ENOMEM; } /** * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. * * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The * auxiliary pointers are necessary so that the bitmaps themselves are not * referred to while they are being freed. */ void free_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; if (WARN_ON(!(forbidden_pages_map && free_pages_map))) return; bm1 = forbidden_pages_map; bm2 = free_pages_map; forbidden_pages_map = NULL; free_pages_map = NULL; memory_bm_free(bm1, PG_UNSAFE_CLEAR); kfree(bm1); memory_bm_free(bm2, PG_UNSAFE_CLEAR); kfree(bm2); pr_debug("Basic memory bitmaps freed\n"); } static void clear_or_poison_free_page(struct page *page) { if (page_poisoning_enabled_static()) __kernel_poison_pages(page, 1); else if (want_init_on_free()) clear_highpage(page); } void clear_or_poison_free_pages(void) { struct memory_bitmap *bm = free_pages_map; unsigned long pfn; if (WARN_ON(!(free_pages_map))) return; if (page_poisoning_enabled() || want_init_on_free()) { memory_bm_position_reset(bm); pfn = memory_bm_next_pfn(bm); while (pfn != BM_END_OF_MAP) { if (pfn_valid(pfn)) clear_or_poison_free_page(pfn_to_page(pfn)); pfn = memory_bm_next_pfn(bm); } memory_bm_position_reset(bm); pr_info("free pages cleared after restore\n"); } } /** * snapshot_additional_pages - Estimate the number of extra pages needed. * @zone: Memory zone to carry out the computation for. * * Estimate the number of additional pages needed for setting up a hibernation * image data structures for @zone (usually, the returned value is greater than * the exact number). */ unsigned int snapshot_additional_pages(struct zone *zone) { unsigned int rtree, nodes; rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), LINKED_PAGE_DATA_SIZE); while (nodes > 1) { nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); rtree += nodes; } return 2 * rtree; } /* * Touch the watchdog for every WD_PAGE_COUNT pages. */ #define WD_PAGE_COUNT (128*1024) static void mark_free_pages(struct zone *zone) { unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT; unsigned long flags; unsigned int order, t; struct page *page; if (zone_is_empty(zone)) return; spin_lock_irqsave(&zone->lock, flags); max_zone_pfn = zone_end_pfn(zone); for_each_valid_pfn(pfn, zone->zone_start_pfn, max_zone_pfn) { page = pfn_to_page(pfn); if (!--page_count) { touch_nmi_watchdog(); page_count = WD_PAGE_COUNT; } if (page_zone(page) != zone) continue; if (!swsusp_page_is_forbidden(page)) swsusp_unset_page_free(page); } for_each_migratetype_order(order, t) { list_for_each_entry(page, &zone->free_area[order].free_list[t], buddy_list) { unsigned long i; pfn = page_to_pfn(page); for (i = 0; i < (1UL << order); i++) { if (!--page_count) { touch_nmi_watchdog(); page_count = WD_PAGE_COUNT; } swsusp_set_page_free(pfn_to_page(pfn + i)); } } } spin_unlock_irqrestore(&zone->lock, flags); } #ifdef CONFIG_HIGHMEM /** * count_free_highmem_pages - Compute the total number of free highmem pages. * * The returned number is system-wide. */ static unsigned int count_free_highmem_pages(void) { struct zone *zone; unsigned int cnt = 0; for_each_populated_zone(zone) if (is_highmem(zone)) cnt += zone_page_state(zone, NR_FREE_PAGES); return cnt; } /** * saveable_highmem_page - Check if a highmem page is saveable. * * Determine whether a highmem page should be included in a hibernation image. * * We should save the page if it isn't Nosave or NosaveFree, or Reserved, * and it isn't part of a free chunk of pages. */ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) { struct page *page; if (!pfn_valid(pfn)) return NULL; page = pfn_to_online_page(pfn); if (!page || page_zone(page) != zone) return NULL; BUG_ON(!PageHighMem(page)); if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) return NULL; if (PageReserved(page) || PageOffline(page)) return NULL; if (page_is_guard(page)) return NULL; return page; } /** * count_highmem_pages - Compute the total number of saveable highmem pages. */ static unsigned int count_highmem_pages(void) { struct zone *zone; unsigned int n = 0; for_each_populated_zone(zone) { unsigned long pfn, max_zone_pfn; if (!is_highmem(zone)) continue; mark_free_pages(zone); max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (saveable_highmem_page(zone, pfn)) n++; } return n; } #endif /* CONFIG_HIGHMEM */ /** * saveable_page - Check if the given page is saveable. * * Determine whether a non-highmem page should be included in a hibernation * image. * * We should save the page if it isn't Nosave, and is not in the range * of pages statically defined as 'unsaveable', and it isn't part of * a free chunk of pages. */ static struct page *saveable_page(struct zone *zone, unsigned long pfn) { struct page *page; if (!pfn_valid(pfn)) return NULL; page = pfn_to_online_page(pfn); if (!page || page_zone(page) != zone) return NULL; BUG_ON(PageHighMem(page)); if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) return NULL; if (PageOffline(page)) return NULL; if (PageReserved(page) && (!kernel_page_present(page) || pfn_is_nosave(pfn))) return NULL; if (page_is_guard(page)) return NULL; return page; } /** * count_data_pages - Compute the total number of saveable non-highmem pages. */ static unsigned int count_data_pages(void) { struct zone *zone; unsigned long pfn, max_zone_pfn; unsigned int n = 0; for_each_populated_zone(zone) { if (is_highmem(zone)) continue; mark_free_pages(zone); max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (saveable_page(zone, pfn)) n++; } return n; } /* * This is needed, because copy_page and memcpy are not usable for copying * task structs. Returns true if the page was filled with only zeros, * otherwise false. */ static inline bool do_copy_page(long *dst, long *src) { long z = 0; int n; for (n = PAGE_SIZE / sizeof(long); n; n--) { z |= *src; *dst++ = *src++; } return !z; } /** * safe_copy_page - Copy a page in a safe way. * * Check if the page we are going to copy is marked as present in the kernel * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() * always returns 'true'. Returns true if the page was entirely composed of * zeros, otherwise it will return false. */ static bool safe_copy_page(void *dst, struct page *s_page) { bool zeros_only; if (kernel_page_present(s_page)) { zeros_only = do_copy_page(dst, page_address(s_page)); } else { hibernate_map_page(s_page); zeros_only = do_copy_page(dst, page_address(s_page)); hibernate_unmap_page(s_page); } return zeros_only; } #ifdef CONFIG_HIGHMEM static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) { return is_highmem(zone) ? saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); } static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) { struct page *s_page, *d_page; void *src, *dst; bool zeros_only; s_page = pfn_to_page(src_pfn); d_page = pfn_to_page(dst_pfn); if (PageHighMem(s_page)) { src = kmap_local_page(s_page); dst = kmap_local_page(d_page); zeros_only = do_copy_page(dst, src); kunmap_local(dst); kunmap_local(src); } else { if (PageHighMem(d_page)) { /* * The page pointed to by src may contain some kernel * data modified by kmap_atomic() */ zeros_only = safe_copy_page(buffer, s_page); dst = kmap_local_page(d_page); copy_page(dst, buffer); kunmap_local(dst); } else { zeros_only = safe_copy_page(page_address(d_page), s_page); } } return zeros_only; } #else #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) { return safe_copy_page(page_address(pfn_to_page(dst_pfn)), pfn_to_page(src_pfn)); } #endif /* CONFIG_HIGHMEM */ /* * Copy data pages will copy all pages into pages pulled from the copy_bm. * If a page was entirely filled with zeros it will be marked in the zero_bm. * * Returns the number of pages copied. */ static unsigned long copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm, struct memory_bitmap *zero_bm) { unsigned long copied_pages = 0; struct zone *zone; unsigned long pfn, copy_pfn; for_each_populated_zone(zone) { unsigned long max_zone_pfn; mark_free_pages(zone); max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (page_is_saveable(zone, pfn)) memory_bm_set_bit(orig_bm, pfn); } memory_bm_position_reset(orig_bm); memory_bm_position_reset(copy_bm); copy_pfn = memory_bm_next_pfn(copy_bm); for (;;) { pfn = memory_bm_next_pfn(orig_bm); if (unlikely(pfn == BM_END_OF_MAP)) break; if (copy_data_page(copy_pfn, pfn)) { memory_bm_set_bit(zero_bm, pfn); /* Use this copy_pfn for a page that is not full of zeros */ continue; } copied_pages++; copy_pfn = memory_bm_next_pfn(copy_bm); } return copied_pages; } /* Total number of image pages */ static unsigned int nr_copy_pages; /* Number of pages needed for saving the original pfns of the image pages */ static unsigned int nr_meta_pages; /* Number of zero pages */ static unsigned int nr_zero_pages; /* * Numbers of normal and highmem page frames allocated for hibernation image * before suspending devices. */ static unsigned int alloc_normal, alloc_highmem; /* * Memory bitmap used for marking saveable pages (during hibernation) or * hibernation image pages (during restore) */ static struct memory_bitmap orig_bm; /* * Memory bitmap used during hibernation for marking allocated page frames that * will contain copies of saveable pages. During restore it is initially used * for marking hibernation image pages, but then the set bits from it are * duplicated in @orig_bm and it is released. On highmem systems it is next * used for marking "safe" highmem pages, but it has to be reinitialized for * this purpose. */ static struct memory_bitmap copy_bm; /* Memory bitmap which tracks which saveable pages were zero filled. */ static struct memory_bitmap zero_bm; /** * swsusp_free - Free pages allocated for hibernation image. * * Image pages are allocated before snapshot creation, so they need to be * released after resume. */ void swsusp_free(void) { unsigned long fb_pfn, fr_pfn; if (!forbidden_pages_map || !free_pages_map) goto out; memory_bm_position_reset(forbidden_pages_map); memory_bm_position_reset(free_pages_map); loop: fr_pfn = memory_bm_next_pfn(free_pages_map); fb_pfn = memory_bm_next_pfn(forbidden_pages_map); /* * Find the next bit set in both bitmaps. This is guaranteed to * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. */ do { if (fb_pfn < fr_pfn) fb_pfn = memory_bm_next_pfn(forbidden_pages_map); if (fr_pfn < fb_pfn) fr_pfn = memory_bm_next_pfn(free_pages_map); } while (fb_pfn != fr_pfn); if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { struct page *page = pfn_to_page(fr_pfn); memory_bm_clear_current(forbidden_pages_map); memory_bm_clear_current(free_pages_map); hibernate_restore_unprotect_page(page_address(page)); __free_page(page); goto loop; } out: nr_copy_pages = 0; nr_meta_pages = 0; nr_zero_pages = 0; restore_pblist = NULL; buffer = NULL; alloc_normal = 0; alloc_highmem = 0; hibernate_restore_protection_end(); } /* Helper functions used for the shrinking of memory. */ #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) /** * preallocate_image_pages - Allocate a number of pages for hibernation image. * @nr_pages: Number of page frames to allocate. * @mask: GFP flags to use for the allocation. * * Return value: Number of page frames actually allocated */ static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) { unsigned long nr_alloc = 0; while (nr_pages > 0) { struct page *page; page = alloc_image_page(mask); if (!page) break; memory_bm_set_bit(©_bm, page_to_pfn(page)); if (PageHighMem(page)) alloc_highmem++; else alloc_normal++; nr_pages--; nr_alloc++; } return nr_alloc; } static unsigned long preallocate_image_memory(unsigned long nr_pages, unsigned long avail_normal) { unsigned long alloc; if (avail_normal <= alloc_normal) return 0; alloc = avail_normal - alloc_normal; if (nr_pages < alloc) alloc = nr_pages; return preallocate_image_pages(alloc, GFP_IMAGE); } #ifdef CONFIG_HIGHMEM static unsigned long preallocate_image_highmem(unsigned long nr_pages) { return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); } /** * __fraction - Compute (an approximation of) x * (multiplier / base). */ static unsigned long __fraction(u64 x, u64 multiplier, u64 base) { return div64_u64(x * multiplier, base); } static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, unsigned long highmem, unsigned long total) { unsigned long alloc = __fraction(nr_pages, highmem, total); return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); } #else /* CONFIG_HIGHMEM */ static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) { return 0; } static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, unsigned long highmem, unsigned long total) { return 0; } #endif /* CONFIG_HIGHMEM */ /** * free_unnecessary_pages - Release preallocated pages not needed for the image. */ static unsigned long free_unnecessary_pages(void) { unsigned long save, to_free_normal, to_free_highmem, free; save = count_data_pages(); if (alloc_normal >= save) { to_free_normal = alloc_normal - save; save = 0; } else { to_free_normal = 0; save -= alloc_normal; } save += count_highmem_pages(); if (alloc_highmem >= save) { to_free_highmem = alloc_highmem - save; } else { to_free_highmem = 0; save -= alloc_highmem; if (to_free_normal > save) to_free_normal -= save; else to_free_normal = 0; } free = to_free_normal + to_free_highmem; memory_bm_position_reset(©_bm); while (to_free_normal > 0 || to_free_highmem > 0) { unsigned long pfn = memory_bm_next_pfn(©_bm); struct page *page = pfn_to_page(pfn); if (PageHighMem(page)) { if (!to_free_highmem) continue; to_free_highmem--; alloc_highmem--; } else { if (!to_free_normal) continue; to_free_normal--; alloc_normal--; } memory_bm_clear_bit(©_bm, pfn); swsusp_unset_page_forbidden(page); swsusp_unset_page_free(page); __free_page(page); } return free; } /** * minimum_image_size - Estimate the minimum acceptable size of an image. * @saveable: Number of saveable pages in the system. * * We want to avoid attempting to free too much memory too hard, so estimate the * minimum acceptable size of a hibernation image to use as the lower limit for * preallocating memory. * * We assume that the minimum image size should be proportional to * * [number of saveable pages] - [number of pages that can be freed in theory] * * where the second term is the sum of (1) reclaimable slab pages, (2) active * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. */ static unsigned long minimum_image_size(unsigned long saveable) { unsigned long size; size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_ACTIVE_ANON) + global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE); return saveable <= size ? 0 : saveable - size; } /** * hibernate_preallocate_memory - Preallocate memory for hibernation image. * * To create a hibernation image it is necessary to make a copy of every page * frame in use. We also need a number of page frames to be free during * hibernation for allocations made while saving the image and for device * drivers, in case they need to allocate memory from their hibernation * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through * /sys/power/reserved_size, respectively). To make this happen, we compute the * total number of available page frames and allocate at least * * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2 * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) * * of them, which corresponds to the maximum size of a hibernation image. * * If image_size is set below the number following from the above formula, * the preallocation of memory is continued until the total number of saveable * pages in the system is below the requested image size or the minimum * acceptable image size returned by minimum_image_size(), whichever is greater. */ int hibernate_preallocate_memory(void) { struct zone *zone; unsigned long saveable, size, max_size, count, highmem, pages = 0; unsigned long alloc, save_highmem, pages_highmem, avail_normal; ktime_t start, stop; int error; pr_info("Preallocating image memory\n"); start = ktime_get(); error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); if (error) { pr_err("Cannot allocate original bitmap\n"); goto err_out; } error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); if (error) { pr_err("Cannot allocate copy bitmap\n"); goto err_out; } error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY); if (error) { pr_err("Cannot allocate zero bitmap\n"); goto err_out; } alloc_normal = 0; alloc_highmem = 0; nr_zero_pages = 0; /* Count the number of saveable data pages. */ save_highmem = count_highmem_pages(); saveable = count_data_pages(); /* * Compute the total number of page frames we can use (count) and the * number of pages needed for image metadata (size). */ count = saveable; saveable += save_highmem; highmem = save_highmem; size = 0; for_each_populated_zone(zone) { size += snapshot_additional_pages(zone); if (is_highmem(zone)) highmem += zone_page_state(zone, NR_FREE_PAGES); else count += zone_page_state(zone, NR_FREE_PAGES); } avail_normal = count; count += highmem; count -= totalreserve_pages; /* Compute the maximum number of saveable pages to leave in memory. */ max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); /* Compute the desired number of image pages specified by image_size. */ size = DIV_ROUND_UP(image_size, PAGE_SIZE); if (size > max_size) size = max_size; /* * If the desired number of image pages is at least as large as the * current number of saveable pages in memory, allocate page frames for * the image and we're done. */ if (size >= saveable) { pages = preallocate_image_highmem(save_highmem); pages += preallocate_image_memory(saveable - pages, avail_normal); goto out; } /* Estimate the minimum size of the image. */ pages = minimum_image_size(saveable); /* * To avoid excessive pressure on the normal zone, leave room in it to * accommodate an image of the minimum size (unless it's already too * small, in which case don't preallocate pages from it at all). */ if (avail_normal > pages) avail_normal -= pages; else avail_normal = 0; if (size < pages) size = min_t(unsigned long, pages, max_size); /* * Let the memory management subsystem know that we're going to need a * large number of page frames to allocate and make it free some memory. * NOTE: If this is not done, performance will be hurt badly in some * test cases. */ shrink_all_memory(saveable - size); /* * The number of saveable pages in memory was too high, so apply some * pressure to decrease it. First, make room for the largest possible * image and fail if that doesn't work. Next, try to decrease the size * of the image as much as indicated by 'size' using allocations from * highmem and non-highmem zones separately. */ pages_highmem = preallocate_image_highmem(highmem / 2); alloc = count - max_size; if (alloc > pages_highmem) alloc -= pages_highmem; else alloc = 0; pages = preallocate_image_memory(alloc, avail_normal); if (pages < alloc) { /* We have exhausted non-highmem pages, try highmem. */ alloc -= pages; pages += pages_highmem; pages_highmem = preallocate_image_highmem(alloc); if (pages_highmem < alloc) { pr_err("Image allocation is %lu pages short\n", alloc - pages_highmem); goto err_out; } pages += pages_highmem; /* * size is the desired number of saveable pages to leave in * memory, so try to preallocate (all memory - size) pages. */ alloc = (count - pages) - size; pages += preallocate_image_highmem(alloc); } else { /* * There are approximately max_size saveable pages at this point * and we want to reduce this number down to size. */ alloc = max_size - size; size = preallocate_highmem_fraction(alloc, highmem, count); pages_highmem += size; alloc -= size; size = preallocate_image_memory(alloc, avail_normal); pages_highmem += preallocate_image_highmem(alloc - size); pages += pages_highmem + size; } /* * We only need as many page frames for the image as there are saveable * pages in memory, but we have allocated more. Release the excessive * ones now. */ pages -= free_unnecessary_pages(); out: stop = ktime_get(); pr_info("Allocated %lu pages for snapshot\n", pages); swsusp_show_speed(start, stop, pages, "Allocated"); return 0; err_out: swsusp_free(); return -ENOMEM; } #ifdef CONFIG_HIGHMEM /** * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. * * Compute the number of non-highmem pages that will be necessary for creating * copies of highmem pages. */ static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; if (free_highmem >= nr_highmem) nr_highmem = 0; else nr_highmem -= free_highmem; return nr_highmem; } #else static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } #endif /* CONFIG_HIGHMEM */ /** * enough_free_mem - Check if there is enough free memory for the image. */ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; unsigned int free = alloc_normal; for_each_populated_zone(zone) if (!is_highmem(zone)) free += zone_page_state(zone, NR_FREE_PAGES); nr_pages += count_pages_for_highmem(nr_highmem); pr_debug("Normal pages needed: %u + %u, available pages: %u\n", nr_pages, PAGES_FOR_IO, free); return free > nr_pages + PAGES_FOR_IO; } #ifdef CONFIG_HIGHMEM /** * get_highmem_buffer - Allocate a buffer for highmem pages. * * If there are some highmem pages in the hibernation image, we may need a * buffer to copy them and/or load their data. */ static inline int get_highmem_buffer(int safe_needed) { buffer = get_image_page(GFP_ATOMIC, safe_needed); return buffer ? 0 : -ENOMEM; } /** * alloc_highmem_pages - Allocate some highmem pages for the image. * * Try to allocate as many pages as needed, but if the number of free highmem * pages is less than that, allocate them all. */ static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) { unsigned int to_alloc = count_free_highmem_pages(); if (to_alloc > nr_highmem) to_alloc = nr_highmem; nr_highmem -= to_alloc; while (to_alloc-- > 0) { struct page *page; page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); memory_bm_set_bit(bm, page_to_pfn(page)); } return nr_highmem; } #else static inline int get_highmem_buffer(int safe_needed) { return 0; } static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } #endif /* CONFIG_HIGHMEM */ /** * swsusp_alloc - Allocate memory for hibernation image. * * We first try to allocate as many highmem pages as there are * saveable highmem pages in the system. If that fails, we allocate * non-highmem pages for the copies of the remaining highmem ones. * * In this approach it is likely that the copies of highmem pages will * also be located in the high memory, because of the way in which * copy_data_pages() works. */ static int swsusp_alloc(struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { if (nr_highmem > 0) { if (get_highmem_buffer(PG_ANY)) goto err_out; if (nr_highmem > alloc_highmem) { nr_highmem -= alloc_highmem; nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); } } if (nr_pages > alloc_normal) { nr_pages -= alloc_normal; while (nr_pages-- > 0) { struct page *page; page = alloc_image_page(GFP_ATOMIC); if (!page) goto err_out; memory_bm_set_bit(copy_bm, page_to_pfn(page)); } } return 0; err_out: swsusp_free(); return -ENOMEM; } asmlinkage __visible int swsusp_save(void) { unsigned int nr_pages, nr_highmem; pr_info("Creating image:\n"); drain_local_pages(NULL); nr_pages = count_data_pages(); nr_highmem = count_highmem_pages(); pr_info("Need to copy %u pages\n", nr_pages + nr_highmem); if (!enough_free_mem(nr_pages, nr_highmem)) { pr_err("Not enough free memory\n"); return -ENOMEM; } if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { pr_err("Memory allocation failed\n"); return -ENOMEM; } /* * During allocating of suspend pagedir, new cold pages may appear. * Kill them. */ drain_local_pages(NULL); nr_copy_pages = copy_data_pages(©_bm, &orig_bm, &zero_bm); /* * End of critical section. From now on, we can write to memory, * but we should not touch disk. This specially means we must _not_ * touch swap space! Except we must write out our image of course. */ nr_pages += nr_highmem; /* We don't actually copy the zero pages */ nr_zero_pages = nr_pages - nr_copy_pages; nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages); return 0; } #ifndef CONFIG_ARCH_HIBERNATION_HEADER static int init_header_complete(struct swsusp_info *info) { memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); info->version_code = LINUX_VERSION_CODE; return 0; } static const char *check_image_kernel(struct swsusp_info *info) { if (info->version_code != LINUX_VERSION_CODE) return "kernel version"; if (strcmp(info->uts.sysname, init_utsname()->sysname)) return "system type"; if (strcmp(info->uts.release, init_utsname()->release)) return "kernel release"; if (strcmp(info->uts.version, init_utsname()->version)) return "version"; if (strcmp(info->uts.machine, init_utsname()->machine)) return "machine"; return NULL; } #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ unsigned long snapshot_get_image_size(void) { return nr_copy_pages + nr_meta_pages + 1; } static int init_header(struct swsusp_info *info) { memset(info, 0, sizeof(struct swsusp_info)); info->num_physpages = get_num_physpages(); info->image_pages = nr_copy_pages; info->pages = snapshot_get_image_size(); info->size = info->pages; info->size <<= PAGE_SHIFT; return init_header_complete(info); } #define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1)) #define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG) /** * pack_pfns - Prepare PFNs for saving. * @bm: Memory bitmap. * @buf: Memory buffer to store the PFNs in. * @zero_bm: Memory bitmap containing PFNs of zero pages. * * PFNs corresponding to set bits in @bm are stored in the area of memory * pointed to by @buf (1 page at a time). Pages which were filled with only * zeros will have the highest bit set in the packed format to distinguish * them from PFNs which will be contained in the image file. */ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm, struct memory_bitmap *zero_bm) { int j; for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { buf[j] = memory_bm_next_pfn(bm); if (unlikely(buf[j] == BM_END_OF_MAP)) break; if (memory_bm_test_bit(zero_bm, buf[j])) buf[j] |= ENCODED_PFN_ZERO_FLAG; } } /** * snapshot_read_next - Get the address to read the next image page from. * @handle: Snapshot handle to be used for the reading. * * On the first call, @handle should point to a zeroed snapshot_handle * structure. The structure gets populated then and a pointer to it should be * passed to this function every next time. * * On success, the function returns a positive number. Then, the caller * is allowed to read up to the returned number of bytes from the memory * location computed by the data_of() macro. * * The function returns 0 to indicate the end of the data stream condition, * and negative numbers are returned on errors. If that happens, the structure * pointed to by @handle is not updated and should not be used any more. */ int snapshot_read_next(struct snapshot_handle *handle) { if (handle->cur > nr_meta_pages + nr_copy_pages) return 0; if (!buffer) { /* This makes the buffer be freed by swsusp_free() */ buffer = get_image_page(GFP_ATOMIC, PG_ANY); if (!buffer) return -ENOMEM; } if (!handle->cur) { int error; error = init_header((struct swsusp_info *)buffer); if (error) return error; handle->buffer = buffer; memory_bm_position_reset(&orig_bm); memory_bm_position_reset(©_bm); } else if (handle->cur <= nr_meta_pages) { clear_page(buffer); pack_pfns(buffer, &orig_bm, &zero_bm); } else { struct page *page; page = pfn_to_page(memory_bm_next_pfn(©_bm)); if (PageHighMem(page)) { /* * Highmem pages are copied to the buffer, * because we can't return with a kmapped * highmem page (we may not be called again). */ void *kaddr; kaddr = kmap_local_page(page); copy_page(buffer, kaddr); kunmap_local(kaddr); handle->buffer = buffer; } else { handle->buffer = page_address(page); } } handle->cur++; return PAGE_SIZE; } static void duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) { unsigned long pfn; memory_bm_position_reset(src); pfn = memory_bm_next_pfn(src); while (pfn != BM_END_OF_MAP) { memory_bm_set_bit(dst, pfn); pfn = memory_bm_next_pfn(src); } } /** * mark_unsafe_pages - Mark pages that were used before hibernation. * * Mark the pages that cannot be used for storing the image during restoration, * because they conflict with the pages that had been used before hibernation. */ static void mark_unsafe_pages(struct memory_bitmap *bm) { unsigned long pfn; /* Clear the "free"/"unsafe" bit for all PFNs */ memory_bm_position_reset(free_pages_map); pfn = memory_bm_next_pfn(free_pages_map); while (pfn != BM_END_OF_MAP) { memory_bm_clear_current(free_pages_map); pfn = memory_bm_next_pfn(free_pages_map); } /* Mark pages that correspond to the "original" PFNs as "unsafe" */ duplicate_memory_bitmap(free_pages_map, bm); allocated_unsafe_pages = 0; } static int check_header(struct swsusp_info *info) { const char *reason; reason = check_image_kernel(info); if (!reason && info->num_physpages != get_num_physpages()) reason = "memory size"; if (reason) { pr_err("Image mismatch: %s\n", reason); return -EPERM; } return 0; } /** * load_header - Check the image header and copy the data from it. */ static int load_header(struct swsusp_info *info) { int error; restore_pblist = NULL; error = check_header(info); if (!error) { nr_copy_pages = info->image_pages; nr_meta_pages = info->pages - info->image_pages - 1; } return error; } /** * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. * @bm: Memory bitmap. * @buf: Area of memory containing the PFNs. * @zero_bm: Memory bitmap with the zero PFNs marked. * * For each element of the array pointed to by @buf (1 page at a time), set the * corresponding bit in @bm. If the page was originally populated with only * zeros then a corresponding bit will also be set in @zero_bm. */ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm, struct memory_bitmap *zero_bm) { unsigned long decoded_pfn; bool zero; int j; for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { if (unlikely(buf[j] == BM_END_OF_MAP)) break; zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG); decoded_pfn = buf[j] & ENCODED_PFN_MASK; if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) { memory_bm_set_bit(bm, decoded_pfn); if (zero) { memory_bm_set_bit(zero_bm, decoded_pfn); nr_zero_pages++; } } else { if (!pfn_valid(decoded_pfn)) pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n", (unsigned long long)PFN_PHYS(decoded_pfn)); return -EFAULT; } } return 0; } #ifdef CONFIG_HIGHMEM /* * struct highmem_pbe is used for creating the list of highmem pages that * should be restored atomically during the resume from disk, because the page * frames they have occupied before the suspend are in use. */ struct highmem_pbe { struct page *copy_page; /* data is here now */ struct page *orig_page; /* data was here before the suspend */ struct highmem_pbe *next; }; /* * List of highmem PBEs needed for restoring the highmem pages that were * allocated before the suspend and included in the suspend image, but have * also been allocated by the "resume" kernel, so their contents cannot be * written directly to their "original" page frames. */ static struct highmem_pbe *highmem_pblist; /** * count_highmem_image_pages - Compute the number of highmem pages in the image. * @bm: Memory bitmap. * * The bits in @bm that correspond to image pages are assumed to be set. */ static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { unsigned long pfn; unsigned int cnt = 0; memory_bm_position_reset(bm); pfn = memory_bm_next_pfn(bm); while (pfn != BM_END_OF_MAP) { if (PageHighMem(pfn_to_page(pfn))) cnt++; pfn = memory_bm_next_pfn(bm); } return cnt; } static unsigned int safe_highmem_pages; static struct memory_bitmap *safe_highmem_bm; /** * prepare_highmem_image - Allocate memory for loading highmem data from image. * @bm: Pointer to an uninitialized memory bitmap structure. * @nr_highmem_p: Pointer to the number of highmem image pages. * * Try to allocate as many highmem pages as there are highmem image pages * (@nr_highmem_p points to the variable containing the number of highmem image * pages). The pages that are "safe" (ie. will not be overwritten when the * hibernation image is restored entirely) have the corresponding bits set in * @bm (it must be uninitialized). * * NOTE: This function should not be called if there are no highmem image pages. */ static int prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) { unsigned int to_alloc; if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) return -ENOMEM; if (get_highmem_buffer(PG_SAFE)) return -ENOMEM; to_alloc = count_free_highmem_pages(); if (to_alloc > *nr_highmem_p) to_alloc = *nr_highmem_p; else *nr_highmem_p = to_alloc; safe_highmem_pages = 0; while (to_alloc-- > 0) { struct page *page; page = alloc_page(__GFP_HIGHMEM); if (!swsusp_page_is_free(page)) { /* The page is "safe", set its bit the bitmap */ memory_bm_set_bit(bm, page_to_pfn(page)); safe_highmem_pages++; } /* Mark the page as allocated */ swsusp_set_page_forbidden(page); swsusp_set_page_free(page); } memory_bm_position_reset(bm); safe_highmem_bm = bm; return 0; } static struct page *last_highmem_page; /** * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. * * For a given highmem image page get a buffer that suspend_write_next() should * return to its caller to write to. * * If the page is to be saved to its "original" page frame or a copy of * the page is to be made in the highmem, @buffer is returned. Otherwise, * the copy of the page is to be made in normal memory, so the address of * the copy is returned. * * If @buffer is returned, the caller of suspend_write_next() will write * the page's contents to @buffer, so they will have to be copied to the * right location on the next call to suspend_write_next() and it is done * with the help of copy_last_highmem_page(). For this purpose, if * @buffer is returned, @last_highmem_page is set to the page to which * the data will have to be copied from @buffer. */ static void *get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) { struct highmem_pbe *pbe; void *kaddr; if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { /* * We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ last_highmem_page = page; return buffer; } /* * The "original" page frame has not been allocated and we have to * use a "safe" page frame to store the loaded page. */ pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); if (!pbe) { swsusp_free(); return ERR_PTR(-ENOMEM); } pbe->orig_page = page; if (safe_highmem_pages > 0) { struct page *tmp; /* Copy of the page will be stored in high memory */ kaddr = buffer; tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); safe_highmem_pages--; last_highmem_page = tmp; pbe->copy_page = tmp; } else { /* Copy of the page will be stored in normal memory */ kaddr = __get_safe_page(ca->gfp_mask); if (!kaddr) return ERR_PTR(-ENOMEM); pbe->copy_page = virt_to_page(kaddr); } pbe->next = highmem_pblist; highmem_pblist = pbe; return kaddr; } /** * copy_last_highmem_page - Copy most the most recent highmem image page. * * Copy the contents of a highmem image from @buffer, where the caller of * snapshot_write_next() has stored them, to the right location represented by * @last_highmem_page . */ static void copy_last_highmem_page(void) { if (last_highmem_page) { void *dst; dst = kmap_local_page(last_highmem_page); copy_page(dst, buffer); kunmap_local(dst); last_highmem_page = NULL; } } static inline int last_highmem_page_copied(void) { return !last_highmem_page; } static inline void free_highmem_data(void) { if (safe_highmem_bm) memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); if (buffer) free_image_page(buffer, PG_UNSAFE_CLEAR); } #else static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } static inline int prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) { return 0; } static inline void *get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) { return ERR_PTR(-EINVAL); } static inline void copy_last_highmem_page(void) {} static inline int last_highmem_page_copied(void) { return 1; } static inline void free_highmem_data(void) {} #endif /* CONFIG_HIGHMEM */ #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) /** * prepare_image - Make room for loading hibernation image. * @new_bm: Uninitialized memory bitmap structure. * @bm: Memory bitmap with unsafe pages marked. * @zero_bm: Memory bitmap containing the zero pages. * * Use @bm to mark the pages that will be overwritten in the process of * restoring the system memory state from the suspend image ("unsafe" pages) * and allocate memory for the image. * * The idea is to allocate a new memory bitmap first and then allocate * as many pages as needed for image data, but without specifying what those * pages will be used for just yet. Instead, we mark them all as allocated and * create a lists of "safe" pages to be used later. On systems with high * memory a list of "safe" highmem pages is created too. * * Because it was not known which pages were unsafe when @zero_bm was created, * make a copy of it and recreate it within safe pages. */ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, struct memory_bitmap *zero_bm) { unsigned int nr_pages, nr_highmem; struct memory_bitmap tmp; struct linked_page *lp; int error; /* If there is no highmem, the buffer will not be necessary */ free_image_page(buffer, PG_UNSAFE_CLEAR); buffer = NULL; nr_highmem = count_highmem_image_pages(bm); mark_unsafe_pages(bm); error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); if (error) goto Free; duplicate_memory_bitmap(new_bm, bm); memory_bm_free(bm, PG_UNSAFE_KEEP); /* Make a copy of zero_bm so it can be created in safe pages */ error = memory_bm_create(&tmp, GFP_ATOMIC, PG_SAFE); if (error) goto Free; duplicate_memory_bitmap(&tmp, zero_bm); memory_bm_free(zero_bm, PG_UNSAFE_KEEP); /* Recreate zero_bm in safe pages */ error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE); if (error) goto Free; duplicate_memory_bitmap(zero_bm, &tmp); memory_bm_free(&tmp, PG_UNSAFE_CLEAR); /* At this point zero_bm is in safe pages and it can be used for restoring. */ if (nr_highmem > 0) { error = prepare_highmem_image(bm, &nr_highmem); if (error) goto Free; } /* * Reserve some safe pages for potential later use. * * NOTE: This way we make sure there will be enough safe pages for the * chain_alloc() in get_buffer(). It is a bit wasteful, but * nr_copy_pages cannot be greater than 50% of the memory anyway. * * nr_copy_pages cannot be less than allocated_unsafe_pages too. */ nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages; nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); while (nr_pages > 0) { lp = get_image_page(GFP_ATOMIC, PG_SAFE); if (!lp) { error = -ENOMEM; goto Free; } lp->next = safe_pages_list; safe_pages_list = lp; nr_pages--; } /* Preallocate memory for the image */ nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages; while (nr_pages > 0) { lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); if (!lp) { error = -ENOMEM; goto Free; } if (!swsusp_page_is_free(virt_to_page(lp))) { /* The page is "safe", add it to the list */ lp->next = safe_pages_list; safe_pages_list = lp; } /* Mark the page as allocated */ swsusp_set_page_forbidden(virt_to_page(lp)); swsusp_set_page_free(virt_to_page(lp)); nr_pages--; } return 0; Free: swsusp_free(); return error; } /** * get_buffer - Get the address to store the next image data page. * * Get the address that snapshot_write_next() should return to its caller to * write to. */ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) { struct pbe *pbe; struct page *page; unsigned long pfn = memory_bm_next_pfn(bm); if (pfn == BM_END_OF_MAP) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); if (PageHighMem(page)) return get_highmem_page_buffer(page, ca); if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) /* * We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ return page_address(page); /* * The "original" page frame has not been allocated and we have to * use a "safe" page frame to store the loaded page. */ pbe = chain_alloc(ca, sizeof(struct pbe)); if (!pbe) { swsusp_free(); return ERR_PTR(-ENOMEM); } pbe->orig_address = page_address(page); pbe->address = __get_safe_page(ca->gfp_mask); if (!pbe->address) return ERR_PTR(-ENOMEM); pbe->next = restore_pblist; restore_pblist = pbe; return pbe->address; } /** * snapshot_write_next - Get the address to store the next image page. * @handle: Snapshot handle structure to guide the writing. * * On the first call, @handle should point to a zeroed snapshot_handle * structure. The structure gets populated then and a pointer to it should be * passed to this function every next time. * * On success, the function returns a positive number. Then, the caller * is allowed to write up to the returned number of bytes to the memory * location computed by the data_of() macro. * * The function returns 0 to indicate the "end of file" condition. Negative * numbers are returned on errors, in which cases the structure pointed to by * @handle is not updated and should not be used any more. */ int snapshot_write_next(struct snapshot_handle *handle) { static struct chain_allocator ca; int error; next: /* Check if we have already loaded the entire image */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) return 0; if (!handle->cur) { if (!buffer) /* This makes the buffer be freed by swsusp_free() */ buffer = get_image_page(GFP_ATOMIC, PG_ANY); if (!buffer) return -ENOMEM; handle->buffer = buffer; } else if (handle->cur == 1) { error = load_header(buffer); if (error) return error; safe_pages_list = NULL; error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); if (error) return error; error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY); if (error) return error; nr_zero_pages = 0; hibernate_restore_protection_begin(); } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm, &zero_bm); if (error) return error; if (handle->cur == nr_meta_pages + 1) { error = prepare_image(&orig_bm, ©_bm, &zero_bm); if (error) return error; chain_init(&ca, GFP_ATOMIC, PG_SAFE); memory_bm_position_reset(&orig_bm); memory_bm_position_reset(&zero_bm); restore_pblist = NULL; handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); } } else { copy_last_highmem_page(); error = hibernate_restore_protect_page(handle->buffer); if (error) return error; handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); } handle->sync_read = (handle->buffer == buffer); handle->cur++; /* Zero pages were not included in the image, memset it and move on. */ if (handle->cur > nr_meta_pages + 1 && memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) { memset(handle->buffer, 0, PAGE_SIZE); goto next; } return PAGE_SIZE; } /** * snapshot_write_finalize - Complete the loading of a hibernation image. * * Must be called after the last call to snapshot_write_next() in case the last * page in the image happens to be a highmem page and its contents should be * stored in highmem. Additionally, it recycles bitmap memory that's not * necessary any more. */ int snapshot_write_finalize(struct snapshot_handle *handle) { int error; copy_last_highmem_page(); error = hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) { memory_bm_recycle(&orig_bm); free_highmem_data(); } return error; } int snapshot_image_loaded(struct snapshot_handle *handle) { return !(!nr_copy_pages || !last_highmem_page_copied() || handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages); } #ifdef CONFIG_HIGHMEM /* Assumes that @buf is ready and points to a "safe" page */ static inline void swap_two_pages_data(struct page *p1, struct page *p2, void *buf) { void *kaddr1, *kaddr2; kaddr1 = kmap_local_page(p1); kaddr2 = kmap_local_page(p2); copy_page(buf, kaddr1); copy_page(kaddr1, kaddr2); copy_page(kaddr2, buf); kunmap_local(kaddr2); kunmap_local(kaddr1); } /** * restore_highmem - Put highmem image pages into their original locations. * * For each highmem page that was in use before hibernation and is included in * the image, and also has been allocated by the "restore" kernel, swap its * current contents with the previous (ie. "before hibernation") ones. * * If the restore eventually fails, we can call this function once again and * restore the highmem state as seen by the restore kernel. */ int restore_highmem(void) { struct highmem_pbe *pbe = highmem_pblist; void *buf; if (!pbe) return 0; buf = get_image_page(GFP_ATOMIC, PG_SAFE); if (!buf) return -ENOMEM; while (pbe) { swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); pbe = pbe->next; } free_image_page(buf, PG_UNSAFE_CLEAR); return 0; } #endif /* CONFIG_HIGHMEM */ |
| 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 | // SPDX-License-Identifier: GPL-2.0-or-later /* AFS security handling * * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/ctype.h> #include <linux/sched.h> #include <linux/hashtable.h> #include <keys/rxrpc-type.h> #include "internal.h" static DEFINE_HASHTABLE(afs_permits_cache, 10); static DEFINE_SPINLOCK(afs_permits_lock); static DEFINE_MUTEX(afs_key_lock); /* * Allocate a key to use as a placeholder for anonymous user security. */ static int afs_alloc_anon_key(struct afs_cell *cell) { struct key *key; mutex_lock(&afs_key_lock); key = cell->anonymous_key; if (!key) { key = rxrpc_get_null_key(cell->key_desc); if (!IS_ERR(key)) cell->anonymous_key = key; } mutex_unlock(&afs_key_lock); if (IS_ERR(key)) return PTR_ERR(key); _debug("anon key %p{%x}", cell->anonymous_key, key_serial(cell->anonymous_key)); return 0; } /* * get a key */ struct key *afs_request_key(struct afs_cell *cell) { struct key *key; int ret; _enter("{%s}", cell->key_desc); _debug("key %s", cell->key_desc); key = request_key_net(&key_type_rxrpc, cell->key_desc, cell->net->net, NULL); if (IS_ERR(key)) { if (PTR_ERR(key) != -ENOKEY) { _leave(" = %ld", PTR_ERR(key)); return key; } if (!cell->anonymous_key) { ret = afs_alloc_anon_key(cell); if (ret < 0) return ERR_PTR(ret); } /* act as anonymous user */ _leave(" = {%x} [anon]", key_serial(cell->anonymous_key)); return key_get(cell->anonymous_key); } else { /* act as authorised user */ _leave(" = {%x} [auth]", key_serial(key)); return key; } } /* * Get a key when pathwalk is in rcuwalk mode. */ struct key *afs_request_key_rcu(struct afs_cell *cell) { struct key *key; _enter("{%s}", cell->key_desc); _debug("key %s", cell->key_desc); key = request_key_net_rcu(&key_type_rxrpc, cell->key_desc, cell->net->net); if (IS_ERR(key)) { if (PTR_ERR(key) != -ENOKEY) { _leave(" = %ld", PTR_ERR(key)); return key; } /* act as anonymous user */ if (!cell->anonymous_key) return NULL; /* Need to allocate */ _leave(" = {%x} [anon]", key_serial(cell->anonymous_key)); return key_get(cell->anonymous_key); } else { /* act as authorised user */ _leave(" = {%x} [auth]", key_serial(key)); return key; } } /* * Dispose of a list of permits. */ static void afs_permits_rcu(struct rcu_head *rcu) { struct afs_permits *permits = container_of(rcu, struct afs_permits, rcu); int i; for (i = 0; i < permits->nr_permits; i++) key_put(permits->permits[i].key); kfree(permits); } /* * Discard a permission cache. */ void afs_put_permits(struct afs_permits *permits) { if (permits && refcount_dec_and_test(&permits->usage)) { spin_lock(&afs_permits_lock); hash_del_rcu(&permits->hash_node); spin_unlock(&afs_permits_lock); call_rcu(&permits->rcu, afs_permits_rcu); } } /* * Clear a permit cache on callback break. */ void afs_clear_permits(struct afs_vnode *vnode) { struct afs_permits *permits; spin_lock(&vnode->lock); permits = rcu_dereference_protected(vnode->permit_cache, lockdep_is_held(&vnode->lock)); RCU_INIT_POINTER(vnode->permit_cache, NULL); spin_unlock(&vnode->lock); afs_put_permits(permits); } /* * Hash a list of permits. Use simple addition to make it easy to add an extra * one at an as-yet indeterminate position in the list. */ static void afs_hash_permits(struct afs_permits *permits) { unsigned long h = permits->nr_permits; int i; for (i = 0; i < permits->nr_permits; i++) { h += (unsigned long)permits->permits[i].key / sizeof(void *); h += permits->permits[i].access; } permits->h = h; } /* * Cache the CallerAccess result obtained from doing a fileserver operation * that returned a vnode status for a particular key. If a callback break * occurs whilst the operation was in progress then we have to ditch the cache * as the ACL *may* have changed. */ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, unsigned int cb_break, struct afs_status_cb *scb) { struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; afs_access_t caller_access = scb->status.caller_access; size_t size = 0; bool changed = false; int i, j; _enter("{%llx:%llu},%x,%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); rcu_read_lock(); /* Check for the common case first: We got back the same access as last * time we tried and already have it recorded. */ permits = rcu_dereference(vnode->permit_cache); if (permits) { if (!permits->invalidated) { for (i = 0; i < permits->nr_permits; i++) { if (permits->permits[i].key < key) continue; if (permits->permits[i].key > key) break; if (permits->permits[i].access != caller_access) { changed = true; break; } if (afs_cb_is_broken(cb_break, vnode)) { changed = true; break; } /* The cache is still good. */ rcu_read_unlock(); return; } } changed |= permits->invalidated; size = permits->nr_permits; /* If this set of permits is now wrong, clear the permits * pointer so that no one tries to use the stale information. */ if (changed) { spin_lock(&vnode->lock); if (permits != rcu_access_pointer(vnode->permit_cache)) goto someone_else_changed_it_unlock; RCU_INIT_POINTER(vnode->permit_cache, NULL); spin_unlock(&vnode->lock); afs_put_permits(permits); permits = NULL; size = 0; } } if (afs_cb_is_broken(cb_break, vnode)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to * drop the lock to do memory allocation. */ if (permits && !refcount_inc_not_zero(&permits->usage)) goto someone_else_changed_it; rcu_read_unlock(); /* Speculatively create a new list with the revised permission set. We * discard this if we find an extant match already in the hash, but * it's easier to compare with memcmp this way. * * We fill in the key pointers at this time, but we don't get the refs * yet. */ size++; new = kzalloc(struct_size(new, permits, size), GFP_NOFS); if (!new) goto out_put; refcount_set(&new->usage, 1); new->nr_permits = size; i = j = 0; if (permits) { for (i = 0; i < permits->nr_permits; i++) { if (j == i && permits->permits[i].key > key) { new->permits[j].key = key; new->permits[j].access = caller_access; j++; } new->permits[j].key = permits->permits[i].key; new->permits[j].access = permits->permits[i].access; j++; } } if (j == i) { new->permits[j].key = key; new->permits[j].access = caller_access; } afs_hash_permits(new); /* Now see if the permit list we want is actually already available */ spin_lock(&afs_permits_lock); hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) { if (xpermits->h != new->h || xpermits->invalidated || xpermits->nr_permits != new->nr_permits || memcmp(xpermits->permits, new->permits, new->nr_permits * sizeof(struct afs_permit)) != 0) continue; if (refcount_inc_not_zero(&xpermits->usage)) { replacement = xpermits; goto found; } break; } for (i = 0; i < new->nr_permits; i++) key_get(new->permits[i].key); hash_add_rcu(afs_permits_cache, &new->hash_node, new->h); replacement = new; new = NULL; found: spin_unlock(&afs_permits_lock); kfree(new); rcu_read_lock(); spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); if (!afs_cb_is_broken(cb_break, vnode) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else zap = replacement; spin_unlock(&vnode->lock); rcu_read_unlock(); afs_put_permits(zap); out_put: afs_put_permits(permits); return; someone_else_changed_it_unlock: spin_unlock(&vnode->lock); someone_else_changed_it: /* Someone else changed the cache under us - don't recheck at this * time. */ rcu_read_unlock(); return; } static bool afs_check_permit_rcu(struct afs_vnode *vnode, struct key *key, afs_access_t *_access) { const struct afs_permits *permits; int i; _enter("{%llx:%llu},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); /* check the permits to see if we've got one yet */ if (key == vnode->volume->cell->anonymous_key) { *_access = vnode->status.anon_access; _leave(" = t [anon %x]", *_access); return true; } permits = rcu_dereference(vnode->permit_cache); if (permits) { for (i = 0; i < permits->nr_permits; i++) { if (permits->permits[i].key < key) continue; if (permits->permits[i].key > key) break; *_access = permits->permits[i].access; _leave(" = %u [perm %x]", !permits->invalidated, *_access); return !permits->invalidated; } } _leave(" = f"); return false; } /* * check with the fileserver to see if the directory or parent directory is * permitted to be accessed with this authorisation, and if so, what access it * is granted */ int afs_check_permit(struct afs_vnode *vnode, struct key *key, afs_access_t *_access) { struct afs_permits *permits; bool valid = false; int i, ret; _enter("{%llx:%llu},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); /* check the permits to see if we've got one yet */ if (key == vnode->volume->cell->anonymous_key) { _debug("anon"); *_access = vnode->status.anon_access; valid = true; } else { rcu_read_lock(); permits = rcu_dereference(vnode->permit_cache); if (permits) { for (i = 0; i < permits->nr_permits; i++) { if (permits->permits[i].key < key) continue; if (permits->permits[i].key > key) break; *_access = permits->permits[i].access; valid = !permits->invalidated; break; } } rcu_read_unlock(); } if (!valid) { /* Check the status on the file we're actually interested in * (the post-processing will cache the result). */ _debug("no valid permit"); ret = afs_fetch_status(vnode, key, false, _access); if (ret < 0) { *_access = 0; _leave(" = %d", ret); return ret; } } _leave(" = 0 [access %x]", *_access); return 0; } /* * check the permissions on an AFS file * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ int afs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct afs_vnode *vnode = AFS_FS_I(inode); afs_access_t access; struct key *key; int ret = 0; _enter("{{%llx:%llu},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); if (mask & MAY_NOT_BLOCK) { key = afs_request_key_rcu(vnode->volume->cell); if (IS_ERR_OR_NULL(key)) return -ECHILD; ret = -ECHILD; if (!afs_check_validity(vnode) || !afs_check_permit_rcu(vnode, key, &access)) goto error; } else { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { _leave(" = %ld [key]", PTR_ERR(key)); return PTR_ERR(key); } ret = afs_validate(vnode, key); if (ret < 0) goto error; /* check the permits to see if we've got one yet */ ret = afs_check_permit(vnode, key, &access); if (ret < 0) goto error; } /* interpret the access mask */ _debug("REQ %x ACC %x on %s", mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); ret = 0; if (S_ISDIR(inode->i_mode)) { if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; } if (mask & MAY_WRITE) { if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ goto permission_denied; } } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; if (!(inode->i_mode & S_IRUSR)) goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; if (!(inode->i_mode & S_IWUSR)) goto permission_denied; } } key_put(key); _leave(" = %d", ret); return ret; permission_denied: ret = -EACCES; error: key_put(key); _leave(" = %d", ret); return ret; } void __exit afs_clean_up_permit_cache(void) { int i; for (i = 0; i < HASH_SIZE(afs_permits_cache); i++) WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i])); } |
| 23 3 9 8 8 8 8 8 1 8 9 1 3 8 113 91 28 24 14 4 8 12 8 12 12 14 11 18 20 15 15 3 9 11 8 8 1 1 11 14 35 9 9 8 8 8 8 8 1 8 8 42 8 9 8 9 8 34 34 3 3 3 3 4 4 7 1 5 7 5 6 7 2 2 9 115 115 9 96 115 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 | // SPDX-License-Identifier: GPL-2.0-or-later /* * V4L2 controls framework control definitions. * * Copyright (C) 2010-2021 Hans Verkuil <hverkuil@kernel.org> */ #include <linux/export.h> #include <media/v4l2-ctrls.h> /* * Returns NULL or a character pointer array containing the menu for * the given control ID. The pointer array ends with a NULL pointer. * An empty string signifies a menu entry that is invalid. This allows * drivers to disable certain options if it is not supported. */ const char * const *v4l2_ctrl_get_menu(u32 id) { static const char * const mpeg_audio_sampling_freq[] = { "44.1 kHz", "48 kHz", "32 kHz", NULL }; static const char * const mpeg_audio_encoding[] = { "MPEG-1/2 Layer I", "MPEG-1/2 Layer II", "MPEG-1/2 Layer III", "MPEG-2/4 AAC", "AC-3", NULL }; static const char * const mpeg_audio_l1_bitrate[] = { "32 kbps", "64 kbps", "96 kbps", "128 kbps", "160 kbps", "192 kbps", "224 kbps", "256 kbps", "288 kbps", "320 kbps", "352 kbps", "384 kbps", "416 kbps", "448 kbps", NULL }; static const char * const mpeg_audio_l2_bitrate[] = { "32 kbps", "48 kbps", "56 kbps", "64 kbps", "80 kbps", "96 kbps", "112 kbps", "128 kbps", "160 kbps", "192 kbps", "224 kbps", "256 kbps", "320 kbps", "384 kbps", NULL }; static const char * const mpeg_audio_l3_bitrate[] = { "32 kbps", "40 kbps", "48 kbps", "56 kbps", "64 kbps", "80 kbps", "96 kbps", "112 kbps", "128 kbps", "160 kbps", "192 kbps", "224 kbps", "256 kbps", "320 kbps", NULL }; static const char * const mpeg_audio_ac3_bitrate[] = { "32 kbps", "40 kbps", "48 kbps", "56 kbps", "64 kbps", "80 kbps", "96 kbps", "112 kbps", "128 kbps", "160 kbps", "192 kbps", "224 kbps", "256 kbps", "320 kbps", "384 kbps", "448 kbps", "512 kbps", "576 kbps", "640 kbps", NULL }; static const char * const mpeg_audio_mode[] = { "Stereo", "Joint Stereo", "Dual", "Mono", NULL }; static const char * const mpeg_audio_mode_extension[] = { "Bound 4", "Bound 8", "Bound 12", "Bound 16", NULL }; static const char * const mpeg_audio_emphasis[] = { "No Emphasis", "50/15 us", "CCITT J17", NULL }; static const char * const mpeg_audio_crc[] = { "No CRC", "16-bit CRC", NULL }; static const char * const mpeg_audio_dec_playback[] = { "Auto", "Stereo", "Left", "Right", "Mono", "Swapped Stereo", NULL }; static const char * const mpeg_video_encoding[] = { "MPEG-1", "MPEG-2", "MPEG-4 AVC", NULL }; static const char * const mpeg_video_aspect[] = { "1x1", "4x3", "16x9", "2.21x1", NULL }; static const char * const mpeg_video_bitrate_mode[] = { "Variable Bitrate", "Constant Bitrate", "Constant Quality", NULL }; static const char * const mpeg_stream_type[] = { "MPEG-2 Program Stream", "MPEG-2 Transport Stream", "MPEG-1 System Stream", "MPEG-2 DVD-compatible Stream", "MPEG-1 VCD-compatible Stream", "MPEG-2 SVCD-compatible Stream", NULL }; static const char * const mpeg_stream_vbi_fmt[] = { "No VBI", "Private Packet, IVTV Format", NULL }; static const char * const camera_power_line_frequency[] = { "Disabled", "50 Hz", "60 Hz", "Auto", NULL }; static const char * const camera_exposure_auto[] = { "Auto Mode", "Manual Mode", "Shutter Priority Mode", "Aperture Priority Mode", NULL }; static const char * const camera_exposure_metering[] = { "Average", "Center Weighted", "Spot", "Matrix", NULL }; static const char * const camera_auto_focus_range[] = { "Auto", "Normal", "Macro", "Infinity", NULL }; static const char * const colorfx[] = { "None", "Black & White", "Sepia", "Negative", "Emboss", "Sketch", "Sky Blue", "Grass Green", "Skin Whiten", "Vivid", "Aqua", "Art Freeze", "Silhouette", "Solarization", "Antique", "Set Cb/Cr", NULL }; static const char * const auto_n_preset_white_balance[] = { "Manual", "Auto", "Incandescent", "Fluorescent", "Fluorescent H", "Horizon", "Daylight", "Flash", "Cloudy", "Shade", NULL, }; static const char * const camera_iso_sensitivity_auto[] = { "Manual", "Auto", NULL }; static const char * const scene_mode[] = { "None", "Backlight", "Beach/Snow", "Candle Light", "Dusk/Dawn", "Fall Colors", "Fireworks", "Landscape", "Night", "Party/Indoor", "Portrait", "Sports", "Sunset", "Text", NULL }; static const char * const tune_emphasis[] = { "None", "50 Microseconds", "75 Microseconds", NULL, }; static const char * const header_mode[] = { "Separate Buffer", "Joined With 1st Frame", NULL, }; static const char * const multi_slice[] = { "Single", "Max Macroblocks", "Max Bytes", NULL, }; static const char * const entropy_mode[] = { "CAVLC", "CABAC", NULL, }; static const char * const mpeg_h264_level[] = { "1", "1b", "1.1", "1.2", "1.3", "2", "2.1", "2.2", "3", "3.1", "3.2", "4", "4.1", "4.2", "5", "5.1", "5.2", "6.0", "6.1", "6.2", NULL, }; static const char * const h264_loop_filter[] = { "Enabled", "Disabled", "Disabled at Slice Boundary", NULL, }; static const char * const h264_profile[] = { "Baseline", "Constrained Baseline", "Main", "Extended", "High", "High 10", "High 422", "High 444 Predictive", "High 10 Intra", "High 422 Intra", "High 444 Intra", "CAVLC 444 Intra", "Scalable Baseline", "Scalable High", "Scalable High Intra", "Stereo High", "Multiview High", "Constrained High", NULL, }; static const char * const vui_sar_idc[] = { "Unspecified", "1:1", "12:11", "10:11", "16:11", "40:33", "24:11", "20:11", "32:11", "80:33", "18:11", "15:11", "64:33", "160:99", "4:3", "3:2", "2:1", "Extended SAR", NULL, }; static const char * const h264_fp_arrangement_type[] = { "Checkerboard", "Column", "Row", "Side by Side", "Top Bottom", "Temporal", NULL, }; static const char * const h264_fmo_map_type[] = { "Interleaved Slices", "Scattered Slices", "Foreground with Leftover", "Box Out", "Raster Scan", "Wipe Scan", "Explicit", NULL, }; static const char * const h264_decode_mode[] = { "Slice-Based", "Frame-Based", NULL, }; static const char * const h264_start_code[] = { "No Start Code", "Annex B Start Code", NULL, }; static const char * const h264_hierarchical_coding_type[] = { "Hier Coding B", "Hier Coding P", NULL, }; static const char * const mpeg_mpeg2_level[] = { "Low", "Main", "High 1440", "High", NULL, }; static const char * const mpeg2_profile[] = { "Simple", "Main", "SNR Scalable", "Spatially Scalable", "High", NULL, }; static const char * const mpeg_mpeg4_level[] = { "0", "0b", "1", "2", "3", "3b", "4", "5", NULL, }; static const char * const mpeg4_profile[] = { "Simple", "Advanced Simple", "Core", "Simple Scalable", "Advanced Coding Efficiency", NULL, }; static const char * const vpx_golden_frame_sel[] = { "Use Previous Frame", "Use Previous Specific Frame", NULL, }; static const char * const vp8_profile[] = { "0", "1", "2", "3", NULL, }; static const char * const vp9_profile[] = { "0", "1", "2", "3", NULL, }; static const char * const vp9_level[] = { "1", "1.1", "2", "2.1", "3", "3.1", "4", "4.1", "5", "5.1", "5.2", "6", "6.1", "6.2", NULL, }; static const char * const flash_led_mode[] = { "Off", "Flash", "Torch", NULL, }; static const char * const flash_strobe_source[] = { "Software", "External", NULL, }; static const char * const jpeg_chroma_subsampling[] = { "4:4:4", "4:2:2", "4:2:0", "4:1:1", "4:1:0", "Gray", NULL, }; static const char * const dv_tx_mode[] = { "DVI-D", "HDMI", NULL, }; static const char * const dv_rgb_range[] = { "Automatic", "RGB Limited Range (16-235)", "RGB Full Range (0-255)", NULL, }; static const char * const dv_it_content_type[] = { "Graphics", "Photo", "Cinema", "Game", "No IT Content", NULL, }; static const char * const detect_md_mode[] = { "Disabled", "Global", "Threshold Grid", "Region Grid", NULL, }; static const char * const av1_profile[] = { "Main", "High", "Professional", NULL, }; static const char * const av1_level[] = { "2.0", "2.1", "2.2", "2.3", "3.0", "3.1", "3.2", "3.3", "4.0", "4.1", "4.2", "4.3", "5.0", "5.1", "5.2", "5.3", "6.0", "6.1", "6.2", "6.3", "7.0", "7.1", "7.2", "7.3", NULL, }; static const char * const hevc_profile[] = { "Main", "Main Still Picture", "Main 10", NULL, }; static const char * const hevc_level[] = { "1", "2", "2.1", "3", "3.1", "4", "4.1", "5", "5.1", "5.2", "6", "6.1", "6.2", NULL, }; static const char * const hevc_hierarchial_coding_type[] = { "B", "P", NULL, }; static const char * const hevc_refresh_type[] = { "None", "CRA", "IDR", NULL, }; static const char * const hevc_size_of_length_field[] = { "0", "1", "2", "4", NULL, }; static const char * const hevc_tier[] = { "Main", "High", NULL, }; static const char * const hevc_loop_filter_mode[] = { "Disabled", "Enabled", "Disabled at slice boundary", "NULL", }; static const char * const hevc_decode_mode[] = { "Slice-Based", "Frame-Based", NULL, }; static const char * const hevc_start_code[] = { "No Start Code", "Annex B Start Code", NULL, }; static const char * const camera_orientation[] = { "Front", "Back", "External", NULL, }; static const char * const mpeg_video_frame_skip[] = { "Disabled", "Level Limit", "VBV/CPB Limit", NULL, }; static const char * const intra_refresh_period_type[] = { "Random", "Cyclic", NULL, }; switch (id) { case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ: return mpeg_audio_sampling_freq; case V4L2_CID_MPEG_AUDIO_ENCODING: return mpeg_audio_encoding; case V4L2_CID_MPEG_AUDIO_L1_BITRATE: return mpeg_audio_l1_bitrate; case V4L2_CID_MPEG_AUDIO_L2_BITRATE: return mpeg_audio_l2_bitrate; case V4L2_CID_MPEG_AUDIO_L3_BITRATE: return mpeg_audio_l3_bitrate; case V4L2_CID_MPEG_AUDIO_AC3_BITRATE: return mpeg_audio_ac3_bitrate; case V4L2_CID_MPEG_AUDIO_MODE: return mpeg_audio_mode; case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION: return mpeg_audio_mode_extension; case V4L2_CID_MPEG_AUDIO_EMPHASIS: return mpeg_audio_emphasis; case V4L2_CID_MPEG_AUDIO_CRC: return mpeg_audio_crc; case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK: case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK: return mpeg_audio_dec_playback; case V4L2_CID_MPEG_VIDEO_ENCODING: return mpeg_video_encoding; case V4L2_CID_MPEG_VIDEO_ASPECT: return mpeg_video_aspect; case V4L2_CID_MPEG_VIDEO_BITRATE_MODE: return mpeg_video_bitrate_mode; case V4L2_CID_MPEG_STREAM_TYPE: return mpeg_stream_type; case V4L2_CID_MPEG_STREAM_VBI_FMT: return mpeg_stream_vbi_fmt; case V4L2_CID_POWER_LINE_FREQUENCY: return camera_power_line_frequency; case V4L2_CID_EXPOSURE_AUTO: return camera_exposure_auto; case V4L2_CID_EXPOSURE_METERING: return camera_exposure_metering; case V4L2_CID_AUTO_FOCUS_RANGE: return camera_auto_focus_range; case V4L2_CID_COLORFX: return colorfx; case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE: return auto_n_preset_white_balance; case V4L2_CID_ISO_SENSITIVITY_AUTO: return camera_iso_sensitivity_auto; case V4L2_CID_SCENE_MODE: return scene_mode; case V4L2_CID_TUNE_PREEMPHASIS: return tune_emphasis; case V4L2_CID_TUNE_DEEMPHASIS: return tune_emphasis; case V4L2_CID_FLASH_LED_MODE: return flash_led_mode; case V4L2_CID_FLASH_STROBE_SOURCE: return flash_strobe_source; case V4L2_CID_MPEG_VIDEO_HEADER_MODE: return header_mode; case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE: return mpeg_video_frame_skip; case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE: return multi_slice; case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE: return entropy_mode; case V4L2_CID_MPEG_VIDEO_H264_LEVEL: return mpeg_h264_level; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE: return h264_loop_filter; case V4L2_CID_MPEG_VIDEO_H264_PROFILE: return h264_profile; case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC: return vui_sar_idc; case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE: return h264_fp_arrangement_type; case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: return h264_fmo_map_type; case V4L2_CID_STATELESS_H264_DECODE_MODE: return h264_decode_mode; case V4L2_CID_STATELESS_H264_START_CODE: return h264_start_code; case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE: return h264_hierarchical_coding_type; case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: return mpeg_mpeg2_level; case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: return mpeg2_profile; case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL: return mpeg_mpeg4_level; case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE: return mpeg4_profile; case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL: return vpx_golden_frame_sel; case V4L2_CID_MPEG_VIDEO_VP8_PROFILE: return vp8_profile; case V4L2_CID_MPEG_VIDEO_VP9_PROFILE: return vp9_profile; case V4L2_CID_MPEG_VIDEO_VP9_LEVEL: return vp9_level; case V4L2_CID_JPEG_CHROMA_SUBSAMPLING: return jpeg_chroma_subsampling; case V4L2_CID_DV_TX_MODE: return dv_tx_mode; case V4L2_CID_DV_TX_RGB_RANGE: case V4L2_CID_DV_RX_RGB_RANGE: return dv_rgb_range; case V4L2_CID_DV_TX_IT_CONTENT_TYPE: case V4L2_CID_DV_RX_IT_CONTENT_TYPE: return dv_it_content_type; case V4L2_CID_DETECT_MD_MODE: return detect_md_mode; case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE: return hevc_profile; case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL: return hevc_level; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE: return hevc_hierarchial_coding_type; case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE: return hevc_refresh_type; case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD: return hevc_size_of_length_field; case V4L2_CID_MPEG_VIDEO_HEVC_TIER: return hevc_tier; case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE: return hevc_loop_filter_mode; case V4L2_CID_MPEG_VIDEO_AV1_PROFILE: return av1_profile; case V4L2_CID_MPEG_VIDEO_AV1_LEVEL: return av1_level; case V4L2_CID_STATELESS_HEVC_DECODE_MODE: return hevc_decode_mode; case V4L2_CID_STATELESS_HEVC_START_CODE: return hevc_start_code; case V4L2_CID_CAMERA_ORIENTATION: return camera_orientation; case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE: return intra_refresh_period_type; default: return NULL; } } EXPORT_SYMBOL(v4l2_ctrl_get_menu); #define __v4l2_qmenu_int_len(arr, len) ({ *(len) = ARRAY_SIZE(arr); (arr); }) /* * Returns NULL or an s64 type array containing the menu for given * control ID. The total number of the menu items is returned in @len. */ const s64 *v4l2_ctrl_get_int_menu(u32 id, u32 *len) { static const s64 qmenu_int_vpx_num_partitions[] = { 1, 2, 4, 8, }; static const s64 qmenu_int_vpx_num_ref_frames[] = { 1, 2, 3, }; switch (id) { case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS: return __v4l2_qmenu_int_len(qmenu_int_vpx_num_partitions, len); case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES: return __v4l2_qmenu_int_len(qmenu_int_vpx_num_ref_frames, len); default: *len = 0; return NULL; } } EXPORT_SYMBOL(v4l2_ctrl_get_int_menu); /* Return the control name. */ const char *v4l2_ctrl_get_name(u32 id) { switch (id) { /* USER controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_USER_CLASS: return "User Controls"; case V4L2_CID_BRIGHTNESS: return "Brightness"; case V4L2_CID_CONTRAST: return "Contrast"; case V4L2_CID_SATURATION: return "Saturation"; case V4L2_CID_HUE: return "Hue"; case V4L2_CID_AUDIO_VOLUME: return "Volume"; case V4L2_CID_AUDIO_BALANCE: return "Balance"; case V4L2_CID_AUDIO_BASS: return "Bass"; case V4L2_CID_AUDIO_TREBLE: return "Treble"; case V4L2_CID_AUDIO_MUTE: return "Mute"; case V4L2_CID_AUDIO_LOUDNESS: return "Loudness"; case V4L2_CID_BLACK_LEVEL: return "Black Level"; case V4L2_CID_AUTO_WHITE_BALANCE: return "White Balance, Automatic"; case V4L2_CID_DO_WHITE_BALANCE: return "Do White Balance"; case V4L2_CID_RED_BALANCE: return "Red Balance"; case V4L2_CID_BLUE_BALANCE: return "Blue Balance"; case V4L2_CID_GAMMA: return "Gamma"; case V4L2_CID_EXPOSURE: return "Exposure"; case V4L2_CID_AUTOGAIN: return "Gain, Automatic"; case V4L2_CID_GAIN: return "Gain"; case V4L2_CID_HFLIP: return "Horizontal Flip"; case V4L2_CID_VFLIP: return "Vertical Flip"; case V4L2_CID_POWER_LINE_FREQUENCY: return "Power Line Frequency"; case V4L2_CID_HUE_AUTO: return "Hue, Automatic"; case V4L2_CID_WHITE_BALANCE_TEMPERATURE: return "White Balance Temperature"; case V4L2_CID_SHARPNESS: return "Sharpness"; case V4L2_CID_BACKLIGHT_COMPENSATION: return "Backlight Compensation"; case V4L2_CID_CHROMA_AGC: return "Chroma AGC"; case V4L2_CID_COLOR_KILLER: return "Color Killer"; case V4L2_CID_COLORFX: return "Color Effects"; case V4L2_CID_AUTOBRIGHTNESS: return "Brightness, Automatic"; case V4L2_CID_BAND_STOP_FILTER: return "Band-Stop Filter"; case V4L2_CID_ROTATE: return "Rotate"; case V4L2_CID_BG_COLOR: return "Background Color"; case V4L2_CID_CHROMA_GAIN: return "Chroma Gain"; case V4L2_CID_ILLUMINATORS_1: return "Illuminator 1"; case V4L2_CID_ILLUMINATORS_2: return "Illuminator 2"; case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE: return "Min Number of Capture Buffers"; case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT: return "Min Number of Output Buffers"; case V4L2_CID_ALPHA_COMPONENT: return "Alpha Component"; case V4L2_CID_COLORFX_CBCR: return "Color Effects, CbCr"; case V4L2_CID_COLORFX_RGB: return "Color Effects, RGB"; /* * Codec controls * * The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical. * * Keep the order of the 'case's the same as in videodev2.h! */ case V4L2_CID_CODEC_CLASS: return "Codec Controls"; case V4L2_CID_MPEG_STREAM_TYPE: return "Stream Type"; case V4L2_CID_MPEG_STREAM_PID_PMT: return "Stream PMT Program ID"; case V4L2_CID_MPEG_STREAM_PID_AUDIO: return "Stream Audio Program ID"; case V4L2_CID_MPEG_STREAM_PID_VIDEO: return "Stream Video Program ID"; case V4L2_CID_MPEG_STREAM_PID_PCR: return "Stream PCR Program ID"; case V4L2_CID_MPEG_STREAM_PES_ID_AUDIO: return "Stream PES Audio ID"; case V4L2_CID_MPEG_STREAM_PES_ID_VIDEO: return "Stream PES Video ID"; case V4L2_CID_MPEG_STREAM_VBI_FMT: return "Stream VBI Format"; case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ: return "Audio Sampling Frequency"; case V4L2_CID_MPEG_AUDIO_ENCODING: return "Audio Encoding"; case V4L2_CID_MPEG_AUDIO_L1_BITRATE: return "Audio Layer I Bitrate"; case V4L2_CID_MPEG_AUDIO_L2_BITRATE: return "Audio Layer II Bitrate"; case V4L2_CID_MPEG_AUDIO_L3_BITRATE: return "Audio Layer III Bitrate"; case V4L2_CID_MPEG_AUDIO_MODE: return "Audio Stereo Mode"; case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION: return "Audio Stereo Mode Extension"; case V4L2_CID_MPEG_AUDIO_EMPHASIS: return "Audio Emphasis"; case V4L2_CID_MPEG_AUDIO_CRC: return "Audio CRC"; case V4L2_CID_MPEG_AUDIO_MUTE: return "Audio Mute"; case V4L2_CID_MPEG_AUDIO_AAC_BITRATE: return "Audio AAC Bitrate"; case V4L2_CID_MPEG_AUDIO_AC3_BITRATE: return "Audio AC-3 Bitrate"; case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK: return "Audio Playback"; case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK: return "Audio Multilingual Playback"; case V4L2_CID_MPEG_VIDEO_ENCODING: return "Video Encoding"; case V4L2_CID_MPEG_VIDEO_ASPECT: return "Video Aspect"; case V4L2_CID_MPEG_VIDEO_B_FRAMES: return "Video B Frames"; case V4L2_CID_MPEG_VIDEO_GOP_SIZE: return "Video GOP Size"; case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE: return "Video GOP Closure"; case V4L2_CID_MPEG_VIDEO_PULLDOWN: return "Video Pulldown"; case V4L2_CID_MPEG_VIDEO_BITRATE_MODE: return "Video Bitrate Mode"; case V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY: return "Constant Quality"; case V4L2_CID_MPEG_VIDEO_BITRATE: return "Video Bitrate"; case V4L2_CID_MPEG_VIDEO_BITRATE_PEAK: return "Video Peak Bitrate"; case V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION: return "Video Temporal Decimation"; case V4L2_CID_MPEG_VIDEO_MUTE: return "Video Mute"; case V4L2_CID_MPEG_VIDEO_MUTE_YUV: return "Video Mute YUV"; case V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE: return "Decoder Slice Interface"; case V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER: return "MPEG4 Loop Filter Enable"; case V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB: return "Number of Intra Refresh MBs"; case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE: return "Intra Refresh Period Type"; case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD: return "Intra Refresh Period"; case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE: return "Frame Level Rate Control Enable"; case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE: return "H264 MB Level Rate Control"; case V4L2_CID_MPEG_VIDEO_HEADER_MODE: return "Sequence Header Mode"; case V4L2_CID_MPEG_VIDEO_MAX_REF_PIC: return "Max Number of Reference Pics"; case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE: return "Frame Skip Mode"; case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY: return "Display Delay"; case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY_ENABLE: return "Display Delay Enable"; case V4L2_CID_MPEG_VIDEO_AU_DELIMITER: return "Generate Access Unit Delimiters"; case V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP: return "H263 I-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP: return "H263 P-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP: return "H263 B-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_H263_MIN_QP: return "H263 Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_H263_MAX_QP: return "H263 Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP: return "H264 I-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP: return "H264 P-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP: return "H264 B-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_H264_MAX_QP: return "H264 Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_MIN_QP: return "H264 Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM: return "H264 8x8 Transform Enable"; case V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE: return "H264 CPB Buffer Size"; case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE: return "H264 Entropy Mode"; case V4L2_CID_MPEG_VIDEO_H264_I_PERIOD: return "H264 I-Frame Period"; case V4L2_CID_MPEG_VIDEO_H264_LEVEL: return "H264 Level"; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA: return "H264 Loop Filter Alpha Offset"; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA: return "H264 Loop Filter Beta Offset"; case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE: return "H264 Loop Filter Mode"; case V4L2_CID_MPEG_VIDEO_H264_PROFILE: return "H264 Profile"; case V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT: return "Vertical Size of SAR"; case V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH: return "Horizontal Size of SAR"; case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE: return "Aspect Ratio VUI Enable"; case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC: return "VUI Aspect Ratio IDC"; case V4L2_CID_MPEG_VIDEO_H264_SEI_FRAME_PACKING: return "H264 Enable Frame Packing SEI"; case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_CURRENT_FRAME_0: return "H264 Set Curr. Frame as Frame0"; case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE: return "H264 FP Arrangement Type"; case V4L2_CID_MPEG_VIDEO_H264_FMO: return "H264 Flexible MB Ordering"; case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: return "H264 Map Type for FMO"; case V4L2_CID_MPEG_VIDEO_H264_FMO_SLICE_GROUP: return "H264 FMO Number of Slice Groups"; case V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_DIRECTION: return "H264 FMO Direction of Change"; case V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_RATE: return "H264 FMO Size of 1st Slice Grp"; case V4L2_CID_MPEG_VIDEO_H264_FMO_RUN_LENGTH: return "H264 FMO No. of Consecutive MBs"; case V4L2_CID_MPEG_VIDEO_H264_ASO: return "H264 Arbitrary Slice Ordering"; case V4L2_CID_MPEG_VIDEO_H264_ASO_SLICE_ORDER: return "H264 ASO Slice Order"; case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING: return "Enable H264 Hierarchical Coding"; case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE: return "H264 Hierarchical Coding Type"; case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER:return "H264 Number of HC Layers"; case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP: return "H264 Set QP Value for HC Layers"; case V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION: return "H264 Constrained Intra Pred"; case V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET: return "H264 Chroma QP Index Offset"; case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP: return "H264 I-Frame Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MAX_QP: return "H264 I-Frame Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP: return "H264 P-Frame Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MAX_QP: return "H264 P-Frame Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_MIN_QP: return "H264 B-Frame Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_MAX_QP: return "H264 B-Frame Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L0_BR: return "H264 Hierarchical Lay 0 Bitrate"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L1_BR: return "H264 Hierarchical Lay 1 Bitrate"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L2_BR: return "H264 Hierarchical Lay 2 Bitrate"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L3_BR: return "H264 Hierarchical Lay 3 Bitrate"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L4_BR: return "H264 Hierarchical Lay 4 Bitrate"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L5_BR: return "H264 Hierarchical Lay 5 Bitrate"; case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L6_BR: return "H264 Hierarchical Lay 6 Bitrate"; case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: return "MPEG2 Level"; case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: return "MPEG2 Profile"; case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP: return "MPEG4 I-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP: return "MPEG4 P-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP: return "MPEG4 B-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP: return "MPEG4 Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP: return "MPEG4 Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL: return "MPEG4 Level"; case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE: return "MPEG4 Profile"; case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL: return "Quarter Pixel Search Enable"; case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES: return "Maximum Bytes in a Slice"; case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB: return "Number of MBs in a Slice"; case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE: return "Slice Partitioning Method"; case V4L2_CID_MPEG_VIDEO_VBV_SIZE: return "VBV Buffer Size"; case V4L2_CID_MPEG_VIDEO_DEC_PTS: return "Video Decoder PTS"; case V4L2_CID_MPEG_VIDEO_DEC_FRAME: return "Video Decoder Frame Count"; case V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR: return "Video Decoder Conceal Color"; case V4L2_CID_MPEG_VIDEO_VBV_DELAY: return "Initial Delay for VBV Control"; case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE: return "Horizontal MV Search Range"; case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE: return "Vertical MV Search Range"; case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER: return "Repeat Sequence Header"; case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME: return "Force Key Frame"; case V4L2_CID_MPEG_VIDEO_BASELAYER_PRIORITY_ID: return "Base Layer Priority ID"; case V4L2_CID_MPEG_VIDEO_LTR_COUNT: return "LTR Count"; case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX: return "Frame LTR Index"; case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES: return "Use LTR Frames"; case V4L2_CID_MPEG_VIDEO_AVERAGE_QP: return "Average QP Value"; case V4L2_CID_FWHT_I_FRAME_QP: return "FWHT I-Frame QP Value"; case V4L2_CID_FWHT_P_FRAME_QP: return "FWHT P-Frame QP Value"; /* VPX controls */ case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS: return "VPX Number of Partitions"; case V4L2_CID_MPEG_VIDEO_VPX_IMD_DISABLE_4X4: return "VPX Intra Mode Decision Disable"; case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES: return "VPX No. of Refs for P Frame"; case V4L2_CID_MPEG_VIDEO_VPX_FILTER_LEVEL: return "VPX Loop Filter Level Range"; case V4L2_CID_MPEG_VIDEO_VPX_FILTER_SHARPNESS: return "VPX Deblocking Effect Control"; case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD: return "VPX Golden Frame Refresh Period"; case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL: return "VPX Golden Frame Indicator"; case V4L2_CID_MPEG_VIDEO_VPX_MIN_QP: return "VPX Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_VPX_MAX_QP: return "VPX Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP: return "VPX I-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP: return "VPX P-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_VP8_PROFILE: return "VP8 Profile"; case V4L2_CID_MPEG_VIDEO_VP9_PROFILE: return "VP9 Profile"; case V4L2_CID_MPEG_VIDEO_VP9_LEVEL: return "VP9 Level"; /* HEVC controls */ case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP: return "HEVC I-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP: return "HEVC P-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP: return "HEVC B-Frame QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP: return "HEVC Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP: return "HEVC Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_MIN_QP: return "HEVC I-Frame Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_MAX_QP: return "HEVC I-Frame Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_MIN_QP: return "HEVC P-Frame Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_MAX_QP: return "HEVC P-Frame Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_MIN_QP: return "HEVC B-Frame Minimum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_MAX_QP: return "HEVC B-Frame Maximum QP Value"; case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE: return "HEVC Profile"; case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL: return "HEVC Level"; case V4L2_CID_MPEG_VIDEO_HEVC_TIER: return "HEVC Tier"; case V4L2_CID_MPEG_VIDEO_HEVC_FRAME_RATE_RESOLUTION: return "HEVC Frame Rate Resolution"; case V4L2_CID_MPEG_VIDEO_HEVC_MAX_PARTITION_DEPTH: return "HEVC Maximum Coding Unit Depth"; case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE: return "HEVC Refresh Type"; case V4L2_CID_MPEG_VIDEO_HEVC_CONST_INTRA_PRED: return "HEVC Constant Intra Prediction"; case V4L2_CID_MPEG_VIDEO_HEVC_LOSSLESS_CU: return "HEVC Lossless Encoding"; case V4L2_CID_MPEG_VIDEO_HEVC_WAVEFRONT: return "HEVC Wavefront"; case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE: return "HEVC Loop Filter"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP: return "HEVC QP Values"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE: return "HEVC Hierarchical Coding Type"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER: return "HEVC Hierarchical Coding Layer"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_QP: return "HEVC Hierarchical Layer 0 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_QP: return "HEVC Hierarchical Layer 1 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_QP: return "HEVC Hierarchical Layer 2 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_QP: return "HEVC Hierarchical Layer 3 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_QP: return "HEVC Hierarchical Layer 4 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_QP: return "HEVC Hierarchical Layer 5 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_QP: return "HEVC Hierarchical Layer 6 QP"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_BR: return "HEVC Hierarchical Lay 0 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_BR: return "HEVC Hierarchical Lay 1 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_BR: return "HEVC Hierarchical Lay 2 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_BR: return "HEVC Hierarchical Lay 3 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_BR: return "HEVC Hierarchical Lay 4 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_BR: return "HEVC Hierarchical Lay 5 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR: return "HEVC Hierarchical Lay 6 BitRate"; case V4L2_CID_MPEG_VIDEO_HEVC_GENERAL_PB: return "HEVC General PB"; case V4L2_CID_MPEG_VIDEO_HEVC_TEMPORAL_ID: return "HEVC Temporal ID"; case V4L2_CID_MPEG_VIDEO_HEVC_STRONG_SMOOTHING: return "HEVC Strong Intra Smoothing"; case V4L2_CID_MPEG_VIDEO_HEVC_INTRA_PU_SPLIT: return "HEVC Intra PU Split"; case V4L2_CID_MPEG_VIDEO_HEVC_TMV_PREDICTION: return "HEVC TMV Prediction"; case V4L2_CID_MPEG_VIDEO_HEVC_MAX_NUM_MERGE_MV_MINUS1: return "HEVC Max Num of Candidate MVs"; case V4L2_CID_MPEG_VIDEO_HEVC_WITHOUT_STARTCODE: return "HEVC ENC Without Startcode"; case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_PERIOD: return "HEVC Num of I-Frame b/w 2 IDR"; case V4L2_CID_MPEG_VIDEO_HEVC_LF_BETA_OFFSET_DIV2: return "HEVC Loop Filter Beta Offset"; case V4L2_CID_MPEG_VIDEO_HEVC_LF_TC_OFFSET_DIV2: return "HEVC Loop Filter TC Offset"; case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD: return "HEVC Size of Length Field"; case V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES: return "Reference Frames for a P-Frame"; case V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR: return "Prepend SPS and PPS to IDR"; /* AV1 controls */ case V4L2_CID_MPEG_VIDEO_AV1_PROFILE: return "AV1 Profile"; case V4L2_CID_MPEG_VIDEO_AV1_LEVEL: return "AV1 Level"; /* CAMERA controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_CAMERA_CLASS: return "Camera Controls"; case V4L2_CID_EXPOSURE_AUTO: return "Auto Exposure"; case V4L2_CID_EXPOSURE_ABSOLUTE: return "Exposure Time, Absolute"; case V4L2_CID_EXPOSURE_AUTO_PRIORITY: return "Exposure, Dynamic Framerate"; case V4L2_CID_PAN_RELATIVE: return "Pan, Relative"; case V4L2_CID_TILT_RELATIVE: return "Tilt, Relative"; case V4L2_CID_PAN_RESET: return "Pan, Reset"; case V4L2_CID_TILT_RESET: return "Tilt, Reset"; case V4L2_CID_PAN_ABSOLUTE: return "Pan, Absolute"; case V4L2_CID_TILT_ABSOLUTE: return "Tilt, Absolute"; case V4L2_CID_FOCUS_ABSOLUTE: return "Focus, Absolute"; case V4L2_CID_FOCUS_RELATIVE: return "Focus, Relative"; case V4L2_CID_FOCUS_AUTO: return "Focus, Automatic Continuous"; case V4L2_CID_ZOOM_ABSOLUTE: return "Zoom, Absolute"; case V4L2_CID_ZOOM_RELATIVE: return "Zoom, Relative"; case V4L2_CID_ZOOM_CONTINUOUS: return "Zoom, Continuous"; case V4L2_CID_PRIVACY: return "Privacy"; case V4L2_CID_IRIS_ABSOLUTE: return "Iris, Absolute"; case V4L2_CID_IRIS_RELATIVE: return "Iris, Relative"; case V4L2_CID_AUTO_EXPOSURE_BIAS: return "Auto Exposure, Bias"; case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE: return "White Balance, Auto & Preset"; case V4L2_CID_WIDE_DYNAMIC_RANGE: return "Wide Dynamic Range"; case V4L2_CID_IMAGE_STABILIZATION: return "Image Stabilization"; case V4L2_CID_ISO_SENSITIVITY: return "ISO Sensitivity"; case V4L2_CID_ISO_SENSITIVITY_AUTO: return "ISO Sensitivity, Auto"; case V4L2_CID_EXPOSURE_METERING: return "Exposure, Metering Mode"; case V4L2_CID_SCENE_MODE: return "Scene Mode"; case V4L2_CID_3A_LOCK: return "3A Lock"; case V4L2_CID_AUTO_FOCUS_START: return "Auto Focus, Start"; case V4L2_CID_AUTO_FOCUS_STOP: return "Auto Focus, Stop"; case V4L2_CID_AUTO_FOCUS_STATUS: return "Auto Focus, Status"; case V4L2_CID_AUTO_FOCUS_RANGE: return "Auto Focus, Range"; case V4L2_CID_PAN_SPEED: return "Pan, Speed"; case V4L2_CID_TILT_SPEED: return "Tilt, Speed"; case V4L2_CID_UNIT_CELL_SIZE: return "Unit Cell Size"; case V4L2_CID_CAMERA_ORIENTATION: return "Camera Orientation"; case V4L2_CID_CAMERA_SENSOR_ROTATION: return "Camera Sensor Rotation"; case V4L2_CID_HDR_SENSOR_MODE: return "HDR Sensor Mode"; /* FM Radio Modulator controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_FM_TX_CLASS: return "FM Radio Modulator Controls"; case V4L2_CID_RDS_TX_DEVIATION: return "RDS Signal Deviation"; case V4L2_CID_RDS_TX_PI: return "RDS Program ID"; case V4L2_CID_RDS_TX_PTY: return "RDS Program Type"; case V4L2_CID_RDS_TX_PS_NAME: return "RDS PS Name"; case V4L2_CID_RDS_TX_RADIO_TEXT: return "RDS Radio Text"; case V4L2_CID_RDS_TX_MONO_STEREO: return "RDS Stereo"; case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD: return "RDS Artificial Head"; case V4L2_CID_RDS_TX_COMPRESSED: return "RDS Compressed"; case V4L2_CID_RDS_TX_DYNAMIC_PTY: return "RDS Dynamic PTY"; case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement"; case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: return "RDS Traffic Program"; case V4L2_CID_RDS_TX_MUSIC_SPEECH: return "RDS Music"; case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE: return "RDS Enable Alt Frequencies"; case V4L2_CID_RDS_TX_ALT_FREQS: return "RDS Alternate Frequencies"; case V4L2_CID_AUDIO_LIMITER_ENABLED: return "Audio Limiter Feature Enabled"; case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME: return "Audio Limiter Release Time"; case V4L2_CID_AUDIO_LIMITER_DEVIATION: return "Audio Limiter Deviation"; case V4L2_CID_AUDIO_COMPRESSION_ENABLED: return "Audio Compression Enabled"; case V4L2_CID_AUDIO_COMPRESSION_GAIN: return "Audio Compression Gain"; case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD: return "Audio Compression Threshold"; case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME: return "Audio Compression Attack Time"; case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME: return "Audio Compression Release Time"; case V4L2_CID_PILOT_TONE_ENABLED: return "Pilot Tone Feature Enabled"; case V4L2_CID_PILOT_TONE_DEVIATION: return "Pilot Tone Deviation"; case V4L2_CID_PILOT_TONE_FREQUENCY: return "Pilot Tone Frequency"; case V4L2_CID_TUNE_PREEMPHASIS: return "Pre-Emphasis"; case V4L2_CID_TUNE_POWER_LEVEL: return "Tune Power Level"; case V4L2_CID_TUNE_ANTENNA_CAPACITOR: return "Tune Antenna Capacitor"; /* Flash controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_FLASH_CLASS: return "Flash Controls"; case V4L2_CID_FLASH_LED_MODE: return "LED Mode"; case V4L2_CID_FLASH_STROBE_SOURCE: return "Strobe Source"; case V4L2_CID_FLASH_STROBE: return "Strobe"; case V4L2_CID_FLASH_STROBE_STOP: return "Stop Strobe"; case V4L2_CID_FLASH_STROBE_STATUS: return "Strobe Status"; case V4L2_CID_FLASH_TIMEOUT: return "Strobe Timeout"; case V4L2_CID_FLASH_INTENSITY: return "Intensity, Flash Mode"; case V4L2_CID_FLASH_TORCH_INTENSITY: return "Intensity, Torch Mode"; case V4L2_CID_FLASH_INDICATOR_INTENSITY: return "Intensity, Indicator"; case V4L2_CID_FLASH_FAULT: return "Faults"; case V4L2_CID_FLASH_CHARGE: return "Charge"; case V4L2_CID_FLASH_READY: return "Ready to Strobe"; /* JPEG encoder controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_JPEG_CLASS: return "JPEG Compression Controls"; case V4L2_CID_JPEG_CHROMA_SUBSAMPLING: return "Chroma Subsampling"; case V4L2_CID_JPEG_RESTART_INTERVAL: return "Restart Interval"; case V4L2_CID_JPEG_COMPRESSION_QUALITY: return "Compression Quality"; case V4L2_CID_JPEG_ACTIVE_MARKER: return "Active Markers"; /* Image source controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_IMAGE_SOURCE_CLASS: return "Image Source Controls"; case V4L2_CID_VBLANK: return "Vertical Blanking"; case V4L2_CID_HBLANK: return "Horizontal Blanking"; case V4L2_CID_ANALOGUE_GAIN: return "Analogue Gain"; case V4L2_CID_TEST_PATTERN_RED: return "Red Pixel Value"; case V4L2_CID_TEST_PATTERN_GREENR: return "Green (Red) Pixel Value"; case V4L2_CID_TEST_PATTERN_BLUE: return "Blue Pixel Value"; case V4L2_CID_TEST_PATTERN_GREENB: return "Green (Blue) Pixel Value"; case V4L2_CID_NOTIFY_GAINS: return "Notify Gains"; /* Image processing controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_IMAGE_PROC_CLASS: return "Image Processing Controls"; case V4L2_CID_LINK_FREQ: return "Link Frequency"; case V4L2_CID_PIXEL_RATE: return "Pixel Rate"; case V4L2_CID_TEST_PATTERN: return "Test Pattern"; case V4L2_CID_DEINTERLACING_MODE: return "Deinterlacing Mode"; case V4L2_CID_DIGITAL_GAIN: return "Digital Gain"; /* DV controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_DV_CLASS: return "Digital Video Controls"; case V4L2_CID_DV_TX_HOTPLUG: return "Hotplug Present"; case V4L2_CID_DV_TX_RXSENSE: return "RxSense Present"; case V4L2_CID_DV_TX_EDID_PRESENT: return "EDID Present"; case V4L2_CID_DV_TX_MODE: return "Transmit Mode"; case V4L2_CID_DV_TX_RGB_RANGE: return "Tx RGB Quantization Range"; case V4L2_CID_DV_TX_IT_CONTENT_TYPE: return "Tx IT Content Type"; case V4L2_CID_DV_RX_POWER_PRESENT: return "Power Present"; case V4L2_CID_DV_RX_RGB_RANGE: return "Rx RGB Quantization Range"; case V4L2_CID_DV_RX_IT_CONTENT_TYPE: return "Rx IT Content Type"; case V4L2_CID_FM_RX_CLASS: return "FM Radio Receiver Controls"; case V4L2_CID_TUNE_DEEMPHASIS: return "De-Emphasis"; case V4L2_CID_RDS_RECEPTION: return "RDS Reception"; case V4L2_CID_RF_TUNER_CLASS: return "RF Tuner Controls"; case V4L2_CID_RF_TUNER_RF_GAIN: return "RF Gain"; case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO: return "LNA Gain, Auto"; case V4L2_CID_RF_TUNER_LNA_GAIN: return "LNA Gain"; case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO: return "Mixer Gain, Auto"; case V4L2_CID_RF_TUNER_MIXER_GAIN: return "Mixer Gain"; case V4L2_CID_RF_TUNER_IF_GAIN_AUTO: return "IF Gain, Auto"; case V4L2_CID_RF_TUNER_IF_GAIN: return "IF Gain"; case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: return "Bandwidth, Auto"; case V4L2_CID_RF_TUNER_BANDWIDTH: return "Bandwidth"; case V4L2_CID_RF_TUNER_PLL_LOCK: return "PLL Lock"; case V4L2_CID_RDS_RX_PTY: return "RDS Program Type"; case V4L2_CID_RDS_RX_PS_NAME: return "RDS PS Name"; case V4L2_CID_RDS_RX_RADIO_TEXT: return "RDS Radio Text"; case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement"; case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM: return "RDS Traffic Program"; case V4L2_CID_RDS_RX_MUSIC_SPEECH: return "RDS Music"; /* Detection controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_DETECT_CLASS: return "Detection Controls"; case V4L2_CID_DETECT_MD_MODE: return "Motion Detection Mode"; case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold"; case V4L2_CID_DETECT_MD_THRESHOLD_GRID: return "MD Threshold Grid"; case V4L2_CID_DETECT_MD_REGION_GRID: return "MD Region Grid"; /* Stateless Codec controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_CODEC_STATELESS_CLASS: return "Stateless Codec Controls"; case V4L2_CID_STATELESS_H264_DECODE_MODE: return "H264 Decode Mode"; case V4L2_CID_STATELESS_H264_START_CODE: return "H264 Start Code"; case V4L2_CID_STATELESS_H264_SPS: return "H264 Sequence Parameter Set"; case V4L2_CID_STATELESS_H264_PPS: return "H264 Picture Parameter Set"; case V4L2_CID_STATELESS_H264_SCALING_MATRIX: return "H264 Scaling Matrix"; case V4L2_CID_STATELESS_H264_PRED_WEIGHTS: return "H264 Prediction Weight Table"; case V4L2_CID_STATELESS_H264_SLICE_PARAMS: return "H264 Slice Parameters"; case V4L2_CID_STATELESS_H264_DECODE_PARAMS: return "H264 Decode Parameters"; case V4L2_CID_STATELESS_FWHT_PARAMS: return "FWHT Stateless Parameters"; case V4L2_CID_STATELESS_VP8_FRAME: return "VP8 Frame Parameters"; case V4L2_CID_STATELESS_MPEG2_SEQUENCE: return "MPEG-2 Sequence Header"; case V4L2_CID_STATELESS_MPEG2_PICTURE: return "MPEG-2 Picture Header"; case V4L2_CID_STATELESS_MPEG2_QUANTISATION: return "MPEG-2 Quantisation Matrices"; case V4L2_CID_STATELESS_VP9_COMPRESSED_HDR: return "VP9 Probabilities Updates"; case V4L2_CID_STATELESS_VP9_FRAME: return "VP9 Frame Decode Parameters"; case V4L2_CID_STATELESS_HEVC_SPS: return "HEVC Sequence Parameter Set"; case V4L2_CID_STATELESS_HEVC_PPS: return "HEVC Picture Parameter Set"; case V4L2_CID_STATELESS_HEVC_SLICE_PARAMS: return "HEVC Slice Parameters"; case V4L2_CID_STATELESS_HEVC_SCALING_MATRIX: return "HEVC Scaling Matrix"; case V4L2_CID_STATELESS_HEVC_DECODE_PARAMS: return "HEVC Decode Parameters"; case V4L2_CID_STATELESS_HEVC_DECODE_MODE: return "HEVC Decode Mode"; case V4L2_CID_STATELESS_HEVC_START_CODE: return "HEVC Start Code"; case V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS: return "HEVC Entry Point Offsets"; case V4L2_CID_STATELESS_AV1_SEQUENCE: return "AV1 Sequence Parameters"; case V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY: return "AV1 Tile Group Entry"; case V4L2_CID_STATELESS_AV1_FRAME: return "AV1 Frame Parameters"; case V4L2_CID_STATELESS_AV1_FILM_GRAIN: return "AV1 Film Grain"; /* Colorimetry controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ case V4L2_CID_COLORIMETRY_CLASS: return "Colorimetry Controls"; case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO: return "HDR10 Content Light Info"; case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY: return "HDR10 Mastering Display"; default: return NULL; } } EXPORT_SYMBOL(v4l2_ctrl_get_name); void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, s64 *min, s64 *max, u64 *step, s64 *def, u32 *flags) { *name = v4l2_ctrl_get_name(id); *flags = 0; switch (id) { case V4L2_CID_AUDIO_MUTE: case V4L2_CID_AUDIO_LOUDNESS: case V4L2_CID_AUTO_WHITE_BALANCE: case V4L2_CID_AUTOGAIN: case V4L2_CID_HFLIP: case V4L2_CID_VFLIP: case V4L2_CID_HUE_AUTO: case V4L2_CID_CHROMA_AGC: case V4L2_CID_COLOR_KILLER: case V4L2_CID_AUTOBRIGHTNESS: case V4L2_CID_MPEG_AUDIO_MUTE: case V4L2_CID_MPEG_VIDEO_MUTE: case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE: case V4L2_CID_MPEG_VIDEO_PULLDOWN: case V4L2_CID_EXPOSURE_AUTO_PRIORITY: case V4L2_CID_FOCUS_AUTO: case V4L2_CID_PRIVACY: case V4L2_CID_AUDIO_LIMITER_ENABLED: case V4L2_CID_AUDIO_COMPRESSION_ENABLED: case V4L2_CID_PILOT_TONE_ENABLED: case V4L2_CID_ILLUMINATORS_1: case V4L2_CID_ILLUMINATORS_2: case V4L2_CID_FLASH_STROBE_STATUS: case V4L2_CID_FLASH_CHARGE: case V4L2_CID_FLASH_READY: case V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER: case V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE: case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY_ENABLE: case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE: case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE: case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM: case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE: case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL: case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER: case V4L2_CID_MPEG_VIDEO_AU_DELIMITER: case V4L2_CID_WIDE_DYNAMIC_RANGE: case V4L2_CID_IMAGE_STABILIZATION: case V4L2_CID_RDS_RECEPTION: case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO: case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO: case V4L2_CID_RF_TUNER_IF_GAIN_AUTO: case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: case V4L2_CID_RF_TUNER_PLL_LOCK: case V4L2_CID_RDS_TX_MONO_STEREO: case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD: case V4L2_CID_RDS_TX_COMPRESSED: case V4L2_CID_RDS_TX_DYNAMIC_PTY: case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: case V4L2_CID_RDS_TX_MUSIC_SPEECH: case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE: case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM: case V4L2_CID_RDS_RX_MUSIC_SPEECH: *type = V4L2_CTRL_TYPE_BOOLEAN; *min = 0; *max = *step = 1; break; case V4L2_CID_ROTATE: *type = V4L2_CTRL_TYPE_INTEGER; *flags |= V4L2_CTRL_FLAG_MODIFY_LAYOUT; break; case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE: case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE: case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY: case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD: *type = V4L2_CTRL_TYPE_INTEGER; break; case V4L2_CID_MPEG_VIDEO_LTR_COUNT: *type = V4L2_CTRL_TYPE_INTEGER; break; case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX: *type = V4L2_CTRL_TYPE_INTEGER; *flags |= V4L2_CTRL_FLAG_EXECUTE_ON_WRITE; break; case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES: *type = V4L2_CTRL_TYPE_BITMASK; *flags |= V4L2_CTRL_FLAG_EXECUTE_ON_WRITE; break; case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME: case V4L2_CID_PAN_RESET: case V4L2_CID_TILT_RESET: case V4L2_CID_FLASH_STROBE: case V4L2_CID_FLASH_STROBE_STOP: case V4L2_CID_AUTO_FOCUS_START: case V4L2_CID_AUTO_FOCUS_STOP: case V4L2_CID_DO_WHITE_BALANCE: *type = V4L2_CTRL_TYPE_BUTTON; *flags |= V4L2_CTRL_FLAG_WRITE_ONLY | V4L2_CTRL_FLAG_EXECUTE_ON_WRITE; *min = *max = *step = *def = 0; break; case V4L2_CID_POWER_LINE_FREQUENCY: case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ: case V4L2_CID_MPEG_AUDIO_ENCODING: case V4L2_CID_MPEG_AUDIO_L1_BITRATE: case V4L2_CID_MPEG_AUDIO_L2_BITRATE: case V4L2_CID_MPEG_AUDIO_L3_BITRATE: case V4L2_CID_MPEG_AUDIO_AC3_BITRATE: case V4L2_CID_MPEG_AUDIO_MODE: case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION: case V4L2_CID_MPEG_AUDIO_EMPHASIS: case V4L2_CID_MPEG_AUDIO_CRC: case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK: case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK: case V4L2_CID_MPEG_VIDEO_ENCODING: case V4L2_CID_MPEG_VIDEO_ASPECT: case V4L2_CID_MPEG_VIDEO_BITRATE_MODE: case V4L2_CID_MPEG_STREAM_TYPE: case V4L2_CID_MPEG_STREAM_VBI_FMT: case V4L2_CID_EXPOSURE_AUTO: case V4L2_CID_AUTO_FOCUS_RANGE: case V4L2_CID_COLORFX: case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE: case V4L2_CID_TUNE_PREEMPHASIS: case V4L2_CID_FLASH_LED_MODE: case V4L2_CID_FLASH_STROBE_SOURCE: case V4L2_CID_MPEG_VIDEO_HEADER_MODE: case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE: case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE: case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE: case V4L2_CID_MPEG_VIDEO_H264_LEVEL: case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE: case V4L2_CID_MPEG_VIDEO_H264_PROFILE: case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC: case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE: case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE: case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL: case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE: case V4L2_CID_JPEG_CHROMA_SUBSAMPLING: case V4L2_CID_ISO_SENSITIVITY_AUTO: case V4L2_CID_EXPOSURE_METERING: case V4L2_CID_SCENE_MODE: case V4L2_CID_DV_TX_MODE: case V4L2_CID_DV_TX_RGB_RANGE: case V4L2_CID_DV_TX_IT_CONTENT_TYPE: case V4L2_CID_DV_RX_RGB_RANGE: case V4L2_CID_DV_RX_IT_CONTENT_TYPE: case V4L2_CID_TEST_PATTERN: case V4L2_CID_DEINTERLACING_MODE: case V4L2_CID_TUNE_DEEMPHASIS: case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL: case V4L2_CID_MPEG_VIDEO_VP8_PROFILE: case V4L2_CID_MPEG_VIDEO_VP9_PROFILE: case V4L2_CID_MPEG_VIDEO_VP9_LEVEL: case V4L2_CID_DETECT_MD_MODE: case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE: case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL: case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE: case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE: case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD: case V4L2_CID_MPEG_VIDEO_HEVC_TIER: case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE: case V4L2_CID_MPEG_VIDEO_AV1_PROFILE: case V4L2_CID_MPEG_VIDEO_AV1_LEVEL: case V4L2_CID_STATELESS_HEVC_DECODE_MODE: case V4L2_CID_STATELESS_HEVC_START_CODE: case V4L2_CID_STATELESS_H264_DECODE_MODE: case V4L2_CID_STATELESS_H264_START_CODE: case V4L2_CID_CAMERA_ORIENTATION: case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE: case V4L2_CID_HDR_SENSOR_MODE: *type = V4L2_CTRL_TYPE_MENU; break; case V4L2_CID_LINK_FREQ: *type = V4L2_CTRL_TYPE_INTEGER_MENU; break; case V4L2_CID_RDS_TX_PS_NAME: case V4L2_CID_RDS_TX_RADIO_TEXT: case V4L2_CID_RDS_RX_PS_NAME: case V4L2_CID_RDS_RX_RADIO_TEXT: *type = V4L2_CTRL_TYPE_STRING; break; case V4L2_CID_ISO_SENSITIVITY: case V4L2_CID_AUTO_EXPOSURE_BIAS: case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS: case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES: *type = V4L2_CTRL_TYPE_INTEGER_MENU; break; case V4L2_CID_USER_CLASS: case V4L2_CID_CAMERA_CLASS: case V4L2_CID_CODEC_CLASS: case V4L2_CID_FM_TX_CLASS: case V4L2_CID_FLASH_CLASS: case V4L2_CID_JPEG_CLASS: case V4L2_CID_IMAGE_SOURCE_CLASS: case V4L2_CID_IMAGE_PROC_CLASS: case V4L2_CID_DV_CLASS: case V4L2_CID_FM_RX_CLASS: case V4L2_CID_RF_TUNER_CLASS: case V4L2_CID_DETECT_CLASS: case V4L2_CID_CODEC_STATELESS_CLASS: case V4L2_CID_COLORIMETRY_CLASS: *type = V4L2_CTRL_TYPE_CTRL_CLASS; /* You can neither read nor write these */ *flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY; *min = *max = *step = *def = 0; break; case V4L2_CID_BG_COLOR: case V4L2_CID_COLORFX_RGB: *type = V4L2_CTRL_TYPE_INTEGER; *step = 1; *min = 0; /* Max is calculated as RGB888 that is 2^24 - 1 */ *max = 0xffffff; break; case V4L2_CID_COLORFX_CBCR: *type = V4L2_CTRL_TYPE_INTEGER; *step = 1; *min = 0; *max = 0xffff; break; case V4L2_CID_FLASH_FAULT: case V4L2_CID_JPEG_ACTIVE_MARKER: case V4L2_CID_3A_LOCK: case V4L2_CID_AUTO_FOCUS_STATUS: case V4L2_CID_DV_TX_HOTPLUG: case V4L2_CID_DV_TX_RXSENSE: case V4L2_CID_DV_TX_EDID_PRESENT: case V4L2_CID_DV_RX_POWER_PRESENT: *type = V4L2_CTRL_TYPE_BITMASK; break; case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE: case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT: *type = V4L2_CTRL_TYPE_INTEGER; *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; case V4L2_CID_MPEG_VIDEO_DEC_PTS: *type = V4L2_CTRL_TYPE_INTEGER64; *flags |= V4L2_CTRL_FLAG_VOLATILE | V4L2_CTRL_FLAG_READ_ONLY; *min = *def = 0; *max = 0x1ffffffffLL; *step = 1; break; case V4L2_CID_MPEG_VIDEO_DEC_FRAME: *type = V4L2_CTRL_TYPE_INTEGER64; *flags |= V4L2_CTRL_FLAG_VOLATILE | V4L2_CTRL_FLAG_READ_ONLY; *min = *def = 0; *max = 0x7fffffffffffffffLL; *step = 1; break; case V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR: *type = V4L2_CTRL_TYPE_INTEGER64; *min = 0; /* default for 8 bit black, luma is 16, chroma is 128 */ *def = 0x8000800010LL; *max = 0xffffffffffffLL; *step = 1; break; case V4L2_CID_MPEG_VIDEO_AVERAGE_QP: *type = V4L2_CTRL_TYPE_INTEGER; *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; case V4L2_CID_PIXEL_RATE: *type = V4L2_CTRL_TYPE_INTEGER64; *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; case V4L2_CID_DETECT_MD_REGION_GRID: *type = V4L2_CTRL_TYPE_U8; break; case V4L2_CID_DETECT_MD_THRESHOLD_GRID: *type = V4L2_CTRL_TYPE_U16; break; case V4L2_CID_RDS_TX_ALT_FREQS: *type = V4L2_CTRL_TYPE_U32; break; case V4L2_CID_STATELESS_MPEG2_SEQUENCE: *type = V4L2_CTRL_TYPE_MPEG2_SEQUENCE; break; case V4L2_CID_STATELESS_MPEG2_PICTURE: *type = V4L2_CTRL_TYPE_MPEG2_PICTURE; break; case V4L2_CID_STATELESS_MPEG2_QUANTISATION: *type = V4L2_CTRL_TYPE_MPEG2_QUANTISATION; break; case V4L2_CID_STATELESS_FWHT_PARAMS: *type = V4L2_CTRL_TYPE_FWHT_PARAMS; break; case V4L2_CID_STATELESS_H264_SPS: *type = V4L2_CTRL_TYPE_H264_SPS; break; case V4L2_CID_STATELESS_H264_PPS: *type = V4L2_CTRL_TYPE_H264_PPS; break; case V4L2_CID_STATELESS_H264_SCALING_MATRIX: *type = V4L2_CTRL_TYPE_H264_SCALING_MATRIX; break; case V4L2_CID_STATELESS_H264_SLICE_PARAMS: *type = V4L2_CTRL_TYPE_H264_SLICE_PARAMS; break; case V4L2_CID_STATELESS_H264_DECODE_PARAMS: *type = V4L2_CTRL_TYPE_H264_DECODE_PARAMS; break; case V4L2_CID_STATELESS_H264_PRED_WEIGHTS: *type = V4L2_CTRL_TYPE_H264_PRED_WEIGHTS; break; case V4L2_CID_STATELESS_VP8_FRAME: *type = V4L2_CTRL_TYPE_VP8_FRAME; break; case V4L2_CID_STATELESS_HEVC_SPS: *type = V4L2_CTRL_TYPE_HEVC_SPS; break; case V4L2_CID_STATELESS_HEVC_PPS: *type = V4L2_CTRL_TYPE_HEVC_PPS; break; case V4L2_CID_STATELESS_HEVC_SLICE_PARAMS: *type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS; *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY; break; case V4L2_CID_STATELESS_HEVC_SCALING_MATRIX: *type = V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX; break; case V4L2_CID_STATELESS_HEVC_DECODE_PARAMS: *type = V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS; break; case V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS: *type = V4L2_CTRL_TYPE_U32; *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY; break; case V4L2_CID_STATELESS_VP9_COMPRESSED_HDR: *type = V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR; break; case V4L2_CID_STATELESS_VP9_FRAME: *type = V4L2_CTRL_TYPE_VP9_FRAME; break; case V4L2_CID_STATELESS_AV1_SEQUENCE: *type = V4L2_CTRL_TYPE_AV1_SEQUENCE; break; case V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY: *type = V4L2_CTRL_TYPE_AV1_TILE_GROUP_ENTRY; *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY; break; case V4L2_CID_STATELESS_AV1_FRAME: *type = V4L2_CTRL_TYPE_AV1_FRAME; break; case V4L2_CID_STATELESS_AV1_FILM_GRAIN: *type = V4L2_CTRL_TYPE_AV1_FILM_GRAIN; break; case V4L2_CID_UNIT_CELL_SIZE: *type = V4L2_CTRL_TYPE_AREA; *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO: *type = V4L2_CTRL_TYPE_HDR10_CLL_INFO; break; case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY: *type = V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY; break; default: *type = V4L2_CTRL_TYPE_INTEGER; break; } switch (id) { case V4L2_CID_MPEG_AUDIO_ENCODING: case V4L2_CID_MPEG_AUDIO_MODE: case V4L2_CID_MPEG_VIDEO_BITRATE_MODE: case V4L2_CID_MPEG_VIDEO_B_FRAMES: case V4L2_CID_MPEG_STREAM_TYPE: *flags |= V4L2_CTRL_FLAG_UPDATE; break; case V4L2_CID_AUDIO_VOLUME: case V4L2_CID_AUDIO_BALANCE: case V4L2_CID_AUDIO_BASS: case V4L2_CID_AUDIO_TREBLE: case V4L2_CID_BRIGHTNESS: case V4L2_CID_CONTRAST: case V4L2_CID_SATURATION: case V4L2_CID_HUE: case V4L2_CID_RED_BALANCE: case V4L2_CID_BLUE_BALANCE: case V4L2_CID_GAMMA: case V4L2_CID_SHARPNESS: case V4L2_CID_CHROMA_GAIN: case V4L2_CID_RDS_TX_DEVIATION: case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME: case V4L2_CID_AUDIO_LIMITER_DEVIATION: case V4L2_CID_AUDIO_COMPRESSION_GAIN: case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD: case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME: case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME: case V4L2_CID_PILOT_TONE_DEVIATION: case V4L2_CID_PILOT_TONE_FREQUENCY: case V4L2_CID_TUNE_POWER_LEVEL: case V4L2_CID_TUNE_ANTENNA_CAPACITOR: case V4L2_CID_RF_TUNER_RF_GAIN: case V4L2_CID_RF_TUNER_LNA_GAIN: case V4L2_CID_RF_TUNER_MIXER_GAIN: case V4L2_CID_RF_TUNER_IF_GAIN: case V4L2_CID_RF_TUNER_BANDWIDTH: case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: *flags |= V4L2_CTRL_FLAG_SLIDER; break; case V4L2_CID_PAN_RELATIVE: case V4L2_CID_TILT_RELATIVE: case V4L2_CID_FOCUS_RELATIVE: case V4L2_CID_IRIS_RELATIVE: case V4L2_CID_ZOOM_RELATIVE: *flags |= V4L2_CTRL_FLAG_WRITE_ONLY | V4L2_CTRL_FLAG_EXECUTE_ON_WRITE; break; case V4L2_CID_FLASH_STROBE_STATUS: case V4L2_CID_AUTO_FOCUS_STATUS: case V4L2_CID_FLASH_READY: case V4L2_CID_DV_TX_HOTPLUG: case V4L2_CID_DV_TX_RXSENSE: case V4L2_CID_DV_TX_EDID_PRESENT: case V4L2_CID_DV_RX_POWER_PRESENT: case V4L2_CID_DV_RX_IT_CONTENT_TYPE: case V4L2_CID_RDS_RX_PTY: case V4L2_CID_RDS_RX_PS_NAME: case V4L2_CID_RDS_RX_RADIO_TEXT: case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM: case V4L2_CID_RDS_RX_MUSIC_SPEECH: case V4L2_CID_CAMERA_ORIENTATION: case V4L2_CID_CAMERA_SENSOR_ROTATION: *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; case V4L2_CID_RF_TUNER_PLL_LOCK: *flags |= V4L2_CTRL_FLAG_VOLATILE; break; } } EXPORT_SYMBOL(v4l2_ctrl_fill); |
| 6 172 66 81 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/pagemap.h> #include <linux/blkdev.h> #include "../blk.h" /* * add_gd_partition adds a partitions details to the devices partition * description. */ struct parsed_partitions { struct gendisk *disk; char name[BDEVNAME_SIZE]; struct { sector_t from; sector_t size; int flags; bool has_info; struct partition_meta_info info; } *parts; int next; int limit; bool access_beyond_eod; char *pp_buf; }; typedef struct { struct folio *v; } Sector; void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p); static inline void put_dev_sector(Sector p) { folio_put(p.v); } static inline void put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) { if (n < p->limit) { char tmp[1 + BDEVNAME_SIZE + 10 + 1]; p->parts[n].from = from; p->parts[n].size = size; snprintf(tmp, sizeof(tmp), " %s%d", p->name, n); strlcat(p->pp_buf, tmp, PAGE_SIZE); } } /* detection routines go here in alphabetical order: */ int adfspart_check_ADFS(struct parsed_partitions *state); int adfspart_check_CUMANA(struct parsed_partitions *state); int adfspart_check_EESOX(struct parsed_partitions *state); int adfspart_check_ICS(struct parsed_partitions *state); int adfspart_check_POWERTEC(struct parsed_partitions *state); int aix_partition(struct parsed_partitions *state); int amiga_partition(struct parsed_partitions *state); int atari_partition(struct parsed_partitions *state); int cmdline_partition(struct parsed_partitions *state); int efi_partition(struct parsed_partitions *state); int ibm_partition(struct parsed_partitions *); int karma_partition(struct parsed_partitions *state); int ldm_partition(struct parsed_partitions *state); int mac_partition(struct parsed_partitions *state); int msdos_partition(struct parsed_partitions *state); int of_partition(struct parsed_partitions *state); int osf_partition(struct parsed_partitions *state); int sgi_partition(struct parsed_partitions *state); int sun_partition(struct parsed_partitions *state); int sysv68_partition(struct parsed_partitions *state); int ultrix_partition(struct parsed_partitions *state); |
| 235 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tlb #if !defined(_TRACE_TLB_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TLB_H #include <linux/mm_types.h> #include <linux/tracepoint.h> #define TLB_FLUSH_REASON \ EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \ EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \ EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \ EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \ EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" ) /* * First define the enums in TLB_FLUSH_REASON to be exported to userspace * via TRACE_DEFINE_ENUM(). */ #undef EM #undef EMe #define EM(a,b) TRACE_DEFINE_ENUM(a); #define EMe(a,b) TRACE_DEFINE_ENUM(a); TLB_FLUSH_REASON /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a,b) { a, b }, #define EMe(a,b) { a, b } TRACE_EVENT(tlb_flush, TP_PROTO(int reason, unsigned long pages), TP_ARGS(reason, pages), TP_STRUCT__entry( __field( int, reason) __field(unsigned long, pages) ), TP_fast_assign( __entry->reason = reason; __entry->pages = pages; ), TP_printk("pages:%ld reason:%s (%d)", __entry->pages, __print_symbolic(__entry->reason, TLB_FLUSH_REASON), __entry->reason) ); #endif /* _TRACE_TLB_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 105 106 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_SYNPROXY_SHARED_H #define _NF_SYNPROXY_SHARED_H #include <linux/module.h> #include <linux/skbuff.h> #include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/tcp.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> struct synproxy_stats { unsigned int syn_received; unsigned int cookie_invalid; unsigned int cookie_valid; unsigned int cookie_retrans; unsigned int conn_reopened; }; struct synproxy_net { struct nf_conn *tmpl; struct synproxy_stats __percpu *stats; unsigned int hook_ref4; unsigned int hook_ref6; }; extern unsigned int synproxy_net_id; static inline struct synproxy_net *synproxy_pernet(struct net *net) { return net_generic(net, synproxy_net_id); } struct synproxy_options { u8 options; u8 wscale; u16 mss_option; u16 mss_encode; u32 tsval; u32 tsecr; }; struct nf_synproxy_info; bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts); void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, struct synproxy_options *opts); void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts); bool synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq); struct nf_hook_state; unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs); int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net); void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net); #if IS_ENABLED(CONFIG_IPV6) void synproxy_send_client_synack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts); bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq); unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs); int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net); void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net); #else static inline int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) { return 0; } static inline void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) {}; #endif /* CONFIG_IPV6 */ #endif /* _NF_SYNPROXY_SHARED_H */ |
| 62 62 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 | // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 debugfs for wireless PHYs * * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2018 - 2019, 2021-2025 Intel Corporation */ #include <linux/debugfs.h> #include <linux/rtnetlink.h> #include <linux/vmalloc.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "debugfs.h" #define DEBUGFS_FORMAT_BUFFER_SIZE 100 int mac80211_format_buffer(char __user *userbuf, size_t count, loff_t *ppos, char *fmt, ...) { va_list args; char buf[DEBUGFS_FORMAT_BUFFER_SIZE]; int res; va_start(args, fmt); res = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); return simple_read_from_buffer(userbuf, count, ppos, buf, res); } #define DEBUGFS_READONLY_FILE_FN(name, fmt, value...) \ static ssize_t name## _read(struct file *file, char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ struct ieee80211_local *local = file->private_data; \ \ return mac80211_format_buffer(userbuf, count, ppos, \ fmt "\n", ##value); \ } #define DEBUGFS_READONLY_FILE_OPS(name) \ static const struct debugfs_short_fops name## _ops = { \ .read = name## _read, \ .llseek = generic_file_llseek, \ }; #define DEBUGFS_READONLY_FILE(name, fmt, value...) \ DEBUGFS_READONLY_FILE_FN(name, fmt, value) \ DEBUGFS_READONLY_FILE_OPS(name) #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, phyd, local, &name## _ops) #define DEBUGFS_ADD_MODE(name, mode) \ debugfs_create_file(#name, mode, phyd, local, &name## _ops); DEBUGFS_READONLY_FILE(hw_conf, "%x", local->hw.conf.flags); DEBUGFS_READONLY_FILE(user_power, "%d", local->user_power_level); DEBUGFS_READONLY_FILE(power, "%d", local->hw.conf.power_level); DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", local->total_ps_buffered); DEBUGFS_READONLY_FILE(wep_iv, "%#08x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); static ssize_t aqm_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; struct fq *fq = &local->fq; char buf[200]; int len = 0; spin_lock_bh(&local->fq.lock); len = scnprintf(buf, sizeof(buf), "access name value\n" "R fq_flows_cnt %u\n" "R fq_backlog %u\n" "R fq_overlimit %u\n" "R fq_overmemory %u\n" "R fq_collisions %u\n" "R fq_memory_usage %u\n" "RW fq_memory_limit %u\n" "RW fq_limit %u\n" "RW fq_quantum %u\n", fq->flows_cnt, fq->backlog, fq->overmemory, fq->overlimit, fq->collisions, fq->memory_usage, fq->memory_limit, fq->limit, fq->quantum); spin_unlock_bh(&local->fq.lock); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } static ssize_t aqm_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[100]; if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; if (count && buf[count - 1] == '\n') buf[count - 1] = '\0'; else buf[count] = '\0'; if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1) return count; else if (sscanf(buf, "fq_memory_limit %u", &local->fq.memory_limit) == 1) return count; else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1) return count; return -EINVAL; } static const struct debugfs_short_fops aqm_ops = { .write = aqm_write, .read = aqm_read, .llseek = default_llseek, }; static ssize_t airtime_flags_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[128] = {}, *pos, *end; pos = buf; end = pos + sizeof(buf) - 1; if (local->airtime_flags & AIRTIME_USE_TX) pos += scnprintf(pos, end - pos, "AIRTIME_TX\t(%lx)\n", AIRTIME_USE_TX); if (local->airtime_flags & AIRTIME_USE_RX) pos += scnprintf(pos, end - pos, "AIRTIME_RX\t(%lx)\n", AIRTIME_USE_RX); return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); } static ssize_t airtime_flags_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[16]; if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; if (count && buf[count - 1] == '\n') buf[count - 1] = '\0'; else buf[count] = '\0'; if (kstrtou16(buf, 0, &local->airtime_flags)) return -EINVAL; return count; } static const struct debugfs_short_fops airtime_flags_ops = { .write = airtime_flags_write, .read = airtime_flags_read, .llseek = default_llseek, }; static ssize_t aql_pending_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[400]; int len = 0; len = scnprintf(buf, sizeof(buf), "AC AQL pending\n" "VO %u us\n" "VI %u us\n" "BE %u us\n" "BK %u us\n" "total %u us\n", atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]), atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]), atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]), atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]), atomic_read(&local->aql_total_pending_airtime)); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } static const struct debugfs_short_fops aql_pending_ops = { .read = aql_pending_read, .llseek = default_llseek, }; static ssize_t aql_txq_limit_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[400]; int len = 0; len = scnprintf(buf, sizeof(buf), "AC AQL limit low AQL limit high\n" "VO %u %u\n" "VI %u %u\n" "BE %u %u\n" "BK %u %u\n", local->aql_txq_limit_low[IEEE80211_AC_VO], local->aql_txq_limit_high[IEEE80211_AC_VO], local->aql_txq_limit_low[IEEE80211_AC_VI], local->aql_txq_limit_high[IEEE80211_AC_VI], local->aql_txq_limit_low[IEEE80211_AC_BE], local->aql_txq_limit_high[IEEE80211_AC_BE], local->aql_txq_limit_low[IEEE80211_AC_BK], local->aql_txq_limit_high[IEEE80211_AC_BK]); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } static ssize_t aql_txq_limit_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[100]; u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old; struct sta_info *sta; if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; if (count && buf[count - 1] == '\n') buf[count - 1] = '\0'; else buf[count] = '\0'; if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3) return -EINVAL; if (ac >= IEEE80211_NUM_ACS) return -EINVAL; q_limit_low_old = local->aql_txq_limit_low[ac]; q_limit_high_old = local->aql_txq_limit_high[ac]; guard(wiphy)(local->hw.wiphy); local->aql_txq_limit_low[ac] = q_limit_low; local->aql_txq_limit_high[ac] = q_limit_high; list_for_each_entry(sta, &local->sta_list, list) { /* If a sta has customized queue limits, keep it */ if (sta->airtime[ac].aql_limit_low == q_limit_low_old && sta->airtime[ac].aql_limit_high == q_limit_high_old) { sta->airtime[ac].aql_limit_low = q_limit_low; sta->airtime[ac].aql_limit_high = q_limit_high; } } return count; } static const struct debugfs_short_fops aql_txq_limit_ops = { .write = aql_txq_limit_write, .read = aql_txq_limit_read, .llseek = default_llseek, }; static ssize_t aql_enable_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[3]; int len; len = scnprintf(buf, sizeof(buf), "%d\n", !static_key_false(&aql_disable.key)); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { bool aql_disabled = static_key_false(&aql_disable.key); char buf[3]; size_t len; if (count > sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; buf[sizeof(buf) - 1] = '\0'; len = strlen(buf); if (len > 0 && buf[len - 1] == '\n') buf[len - 1] = 0; if (buf[0] == '0' && buf[1] == '\0') { if (!aql_disabled) static_branch_inc(&aql_disable); } else if (buf[0] == '1' && buf[1] == '\0') { if (aql_disabled) static_branch_dec(&aql_disable); } else { return -EINVAL; } return count; } static const struct debugfs_short_fops aql_enable_ops = { .write = aql_enable_write, .read = aql_enable_read, .llseek = default_llseek, }; static ssize_t force_tx_status_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[3]; int len = 0; len = scnprintf(buf, sizeof(buf), "%d\n", (int)local->force_tx_status); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } static ssize_t force_tx_status_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[3]; if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; if (count && buf[count - 1] == '\n') buf[count - 1] = '\0'; else buf[count] = '\0'; if (buf[0] == '0' && buf[1] == '\0') local->force_tx_status = 0; else if (buf[0] == '1' && buf[1] == '\0') local->force_tx_status = 1; else return -EINVAL; return count; } static const struct debugfs_short_fops force_tx_status_ops = { .write = force_tx_status_write, .read = force_tx_status_read, .llseek = default_llseek, }; #ifdef CONFIG_PM static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; int ret; rtnl_lock(); wiphy_lock(local->hw.wiphy); __ieee80211_suspend(&local->hw, NULL); ret = __ieee80211_resume(&local->hw); wiphy_unlock(local->hw.wiphy); if (ret) cfg80211_shutdown_all_interfaces(local->hw.wiphy); rtnl_unlock(); return count; } static const struct debugfs_short_fops reset_ops = { .write = reset_write, .llseek = noop_llseek, }; #endif static const char *hw_flag_names[] = { #define FLAG(F) [IEEE80211_HW_##F] = #F FLAG(HAS_RATE_CONTROL), FLAG(RX_INCLUDES_FCS), FLAG(HOST_BROADCAST_PS_BUFFERING), FLAG(SIGNAL_UNSPEC), FLAG(SIGNAL_DBM), FLAG(NEED_DTIM_BEFORE_ASSOC), FLAG(SPECTRUM_MGMT), FLAG(AMPDU_AGGREGATION), FLAG(SUPPORTS_PS), FLAG(PS_NULLFUNC_STACK), FLAG(SUPPORTS_DYNAMIC_PS), FLAG(MFP_CAPABLE), FLAG(WANT_MONITOR_VIF), FLAG(NO_VIRTUAL_MONITOR), FLAG(NO_AUTO_VIF), FLAG(SW_CRYPTO_CONTROL), FLAG(SUPPORT_FAST_XMIT), FLAG(REPORTS_TX_ACK_STATUS), FLAG(CONNECTION_MONITOR), FLAG(QUEUE_CONTROL), FLAG(SUPPORTS_PER_STA_GTK), FLAG(AP_LINK_PS), FLAG(TX_AMPDU_SETUP_IN_HW), FLAG(SUPPORTS_RC_TABLE), FLAG(P2P_DEV_ADDR_FOR_INTF), FLAG(TIMING_BEACON_ONLY), FLAG(SUPPORTS_HT_CCK_RATES), FLAG(CHANCTX_STA_CSA), FLAG(SUPPORTS_CLONED_SKBS), FLAG(SINGLE_SCAN_ON_ALL_BANDS), FLAG(TDLS_WIDER_BW), FLAG(SUPPORTS_AMSDU_IN_AMPDU), FLAG(BEACON_TX_STATUS), FLAG(NEEDS_UNIQUE_STA_ADDR), FLAG(SUPPORTS_REORDERING_BUFFER), FLAG(USES_RSS), FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), FLAG(DOESNT_SUPPORT_QOS_NDP), FLAG(BUFF_MMPDU_TXQ), FLAG(SUPPORTS_VHT_EXT_NSS_BW), FLAG(STA_MMPDU_TXQ), FLAG(TX_STATUS_NO_AMPDU_LEN), FLAG(SUPPORTS_MULTI_BSSID), FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID), FLAG(AMPDU_KEYBORDER_SUPPORT), FLAG(SUPPORTS_TX_ENCAP_OFFLOAD), FLAG(SUPPORTS_RX_DECAP_OFFLOAD), FLAG(SUPPORTS_CONC_MON_RX_DECAP), FLAG(DETECTS_COLOR_COLLISION), FLAG(MLO_MCAST_MULTI_LINK_TX), FLAG(DISALLOW_PUNCTURING), FLAG(HANDLES_QUIET_CSA), FLAG(STRICT), #undef FLAG }; static ssize_t hwflags_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; size_t bufsz = 30 * NUM_IEEE80211_HW_FLAGS; char *buf = kzalloc(bufsz, GFP_KERNEL); char *pos = buf, *end = buf + bufsz - 1; ssize_t rv; int i; if (!buf) return -ENOMEM; /* fail compilation if somebody adds or removes * a flag without updating the name array above */ BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS); for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { if (test_bit(i, local->hw.flags)) pos += scnprintf(pos, end - pos, "%s\n", hw_flag_names[i]); } rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); return rv; } static ssize_t hwflags_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; char buf[100]; int val; if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; if (count && buf[count - 1] == '\n') buf[count - 1] = '\0'; else buf[count] = '\0'; if (sscanf(buf, "strict=%d", &val) == 1) { switch (val) { case 0: ieee80211_hw_set(&local->hw, STRICT); return count; case 1: __clear_bit(IEEE80211_HW_STRICT, local->hw.flags); return count; default: return -EINVAL; } } return -EINVAL; } static const struct file_operations hwflags_ops = { .open = simple_open, .read = hwflags_read, .write = hwflags_write, }; static ssize_t misc_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; /* Max len of each line is 16 characters, plus 9 for 'pending:\n' */ size_t bufsz = IEEE80211_MAX_QUEUES * 16 + 9; char *buf; char *pos, *end; ssize_t rv; int i; int ln; buf = kzalloc(bufsz, GFP_KERNEL); if (!buf) return -ENOMEM; pos = buf; end = buf + bufsz - 1; pos += scnprintf(pos, end - pos, "pending:\n"); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { ln = skb_queue_len(&local->pending[i]); pos += scnprintf(pos, end - pos, "[%i] %d\n", i, ln); } rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); return rv; } static ssize_t queues_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; unsigned long flags; char buf[IEEE80211_MAX_QUEUES * 20]; int q, res = 0; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for (q = 0; q < local->hw.queues; q++) res += sprintf(buf + res, "%02d: %#.8lx/%d\n", q, local->queue_stop_reasons[q], skb_queue_len(&local->pending[q])); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); return simple_read_from_buffer(user_buf, count, ppos, buf, res); } DEBUGFS_READONLY_FILE_OPS(queues); DEBUGFS_READONLY_FILE_OPS(misc); /* statistics stuff */ static ssize_t format_devstat_counter(struct ieee80211_local *local, char __user *userbuf, size_t count, loff_t *ppos, int (*printvalue)(struct ieee80211_low_level_stats *stats, char *buf, int buflen)) { struct ieee80211_low_level_stats stats; char buf[20]; int res; wiphy_lock(local->hw.wiphy); res = drv_get_stats(local, &stats); wiphy_unlock(local->hw.wiphy); if (res) return res; res = printvalue(&stats, buf, sizeof(buf)); return simple_read_from_buffer(userbuf, count, ppos, buf, res); } #define DEBUGFS_DEVSTATS_FILE(name) \ static int print_devstats_##name(struct ieee80211_low_level_stats *stats,\ char *buf, int buflen) \ { \ return scnprintf(buf, buflen, "%u\n", stats->name); \ } \ static ssize_t stats_ ##name## _read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ return format_devstat_counter(file->private_data, \ userbuf, \ count, \ ppos, \ print_devstats_##name); \ } \ \ static const struct debugfs_short_fops stats_ ##name## _ops = { \ .read = stats_ ##name## _read, \ .llseek = generic_file_llseek, \ }; #ifdef CONFIG_MAC80211_DEBUG_COUNTERS #define DEBUGFS_STATS_ADD(name) \ debugfs_create_u32(#name, 0400, statsd, &local->name); #endif #define DEBUGFS_DEVSTATS_ADD(name) \ debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount); DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount); DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount); void debugfs_hw_add(struct ieee80211_local *local) { struct dentry *phyd = local->hw.wiphy->debugfsdir; struct dentry *statsd; if (!phyd) return; local->debugfs.keys = debugfs_create_dir("keys", phyd); DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(rate_ctrl_alg); DEBUGFS_ADD(queues); DEBUGFS_ADD(misc); #ifdef CONFIG_PM DEBUGFS_ADD_MODE(reset, 0200); #endif DEBUGFS_ADD_MODE(hwflags, 0600); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); DEBUGFS_ADD(hw_conf); DEBUGFS_ADD_MODE(force_tx_status, 0600); DEBUGFS_ADD_MODE(aql_enable, 0600); DEBUGFS_ADD(aql_pending); DEBUGFS_ADD_MODE(aqm, 0600); DEBUGFS_ADD_MODE(airtime_flags, 0600); DEBUGFS_ADD(aql_txq_limit); debugfs_create_u32("aql_threshold", 0600, phyd, &local->aql_threshold); statsd = debugfs_create_dir("statistics", phyd); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS DEBUGFS_STATS_ADD(dot11TransmittedFragmentCount); DEBUGFS_STATS_ADD(dot11MulticastTransmittedFrameCount); DEBUGFS_STATS_ADD(dot11FailedCount); DEBUGFS_STATS_ADD(dot11RetryCount); DEBUGFS_STATS_ADD(dot11MultipleRetryCount); DEBUGFS_STATS_ADD(dot11FrameDuplicateCount); DEBUGFS_STATS_ADD(dot11ReceivedFragmentCount); DEBUGFS_STATS_ADD(dot11MulticastReceivedFrameCount); DEBUGFS_STATS_ADD(dot11TransmittedFrameCount); DEBUGFS_STATS_ADD(tx_handlers_queued); DEBUGFS_STATS_ADD(tx_handlers_drop_wep); DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); DEBUGFS_STATS_ADD(rx_handlers_drop); DEBUGFS_STATS_ADD(rx_handlers_queued); DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); DEBUGFS_STATS_ADD(tx_expand_skb_head); DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag); DEBUGFS_STATS_ADD(rx_handlers_fragments); DEBUGFS_STATS_ADD(tx_status_drop); #endif DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); } |
| 91 110 155 46 46 24 204 204 7 202 2 1212 1210 638 519 67 138 87 626 627 632 77 124 587 124 589 129 127 2 128 129 427 4 4 4 446 446 129 129 637 628 96 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux network device link state notification * * Author: * Stefan Rompf <sux@loplof.de> */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/rtnetlink.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/types.h> #include "dev.h" enum lw_bits { LW_URGENT = 0, }; static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned int default_operstate(const struct net_device *dev) { if (netif_testing(dev)) return IF_OPER_TESTING; /* Some uppers (DSA) have additional sources for being down, so * first check whether lower is indeed the source of its down state. */ if (!netif_carrier_ok(dev)) { struct net_device *peer; int iflink; /* If called from netdev_run_todo()/linkwatch_sync_dev(), * dev_net(dev) can be already freed, and RTNL is not held. */ if (dev->reg_state <= NETREG_REGISTERED) iflink = dev_get_iflink(dev); else iflink = dev->ifindex; if (iflink == dev->ifindex) return IF_OPER_DOWN; ASSERT_RTNL(); peer = __dev_get_by_index(dev_net(dev), iflink); if (!peer) return IF_OPER_DOWN; return netif_carrier_ok(peer) ? IF_OPER_DOWN : IF_OPER_LOWERLAYERDOWN; } if (netif_dormant(dev)) return IF_OPER_DORMANT; return IF_OPER_UP; } static void rfc2863_policy(struct net_device *dev) { unsigned int operstate = default_operstate(dev); if (operstate == READ_ONCE(dev->operstate)) return; switch(dev->link_mode) { case IF_LINK_MODE_TESTING: if (operstate == IF_OPER_UP) operstate = IF_OPER_TESTING; break; case IF_LINK_MODE_DORMANT: if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT; break; case IF_LINK_MODE_DEFAULT: default: break; } WRITE_ONCE(dev->operstate, operstate); } void linkwatch_init_dev(struct net_device *dev) { /* Handle pre-registration link state changes */ if (!netif_carrier_ok(dev) || netif_dormant(dev) || netif_testing(dev)) rfc2863_policy(dev); } static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) return false; if (dev->ifindex != dev_get_iflink(dev)) return true; if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } static void linkwatch_add_event(struct net_device *dev) { unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } static void linkwatch_schedule_work(int urgent) { unsigned long delay = linkwatch_nextevent - jiffies; if (test_bit(LW_URGENT, &linkwatch_flags)) return; /* Minimise down-time: drop delay for up event. */ if (urgent) { if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) return; delay = 0; } /* If we wrap around we'll delay it by at most HZ. */ if (delay > HZ) delay = 0; /* * If urgent, schedule immediate execution; otherwise, don't * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) mod_delayed_work(system_dfl_wq, &linkwatch_work, 0); else queue_delayed_work(system_dfl_wq, &linkwatch_work, delay); } static void linkwatch_do_dev(struct net_device *dev) { /* * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. */ smp_mb__before_atomic(); /* We are about to handle this device, * so new events can be accepted */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else dev_deactivate(dev); netif_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). */ __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) { #define MAX_DO_DEV_PER_LOOP 100 int do_dev = MAX_DO_DEV_PER_LOOP; /* Use a local list here since we add non-urgent * events back to the global one when called with * urgent_only=1. */ LIST_HEAD(wrk); /* Give urgent case more budget */ if (urgent_only) do_dev += MAX_DO_DEV_PER_LOOP; /* * Limit the number of linkwatch events to one * per second so that a runaway driver does not * cause a storm of messages on the netlink * socket. This limit does not apply to up events * while the device qdisc is down. */ if (!urgent_only) linkwatch_nextevent = jiffies + HZ; /* Limit wrap-around effect on delay. */ else if (time_after(linkwatch_nextevent, jiffies + HZ)) linkwatch_nextevent = jiffies; clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist, &wrk); while (!list_empty(&wrk) && do_dev > 0) { struct net_device *dev; dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); if (!netif_device_present(dev) || (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } /* We must free netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); netdev_lock_ops(dev); linkwatch_do_dev(dev); netdev_unlock_ops(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } /* Add the remaining work back to lweventlist */ list_splice_init(&wrk, &lweventlist); if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); } static bool linkwatch_clean_dev(struct net_device *dev) { unsigned long flags; bool clean = false; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); clean = true; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); return clean; } void __linkwatch_sync_dev(struct net_device *dev) { netdev_ops_assert_locked(dev); if (linkwatch_clean_dev(dev)) linkwatch_do_dev(dev); } void linkwatch_sync_dev(struct net_device *dev) { if (linkwatch_clean_dev(dev)) { netdev_lock_ops(dev); linkwatch_do_dev(dev); netdev_unlock_ops(dev); } } /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { __linkwatch_run_queue(0); } static void linkwatch_event(struct work_struct *dummy) { rtnl_lock(); __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { linkwatch_add_event(dev); } else if (!urgent) return; linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event); |
| 5 24 20 5 39 39 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "cookie.h" #include "peer.h" #include "device.h" #include "messages.h" #include "ratelimiter.h" #include "timers.h" #include <crypto/blake2s.h> #include <crypto/chacha20poly1305.h> #include <crypto/utils.h> #include <net/ipv6.h> void wg_cookie_checker_init(struct cookie_checker *checker, struct wg_device *wg) { init_rwsem(&checker->secret_lock); checker->secret_birthdate = ktime_get_coarse_boottime_ns(); get_random_bytes(checker->secret, NOISE_HASH_LEN); checker->device = wg; } enum { COOKIE_KEY_LABEL_LEN = 8 }; static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] __nonstring = "mac1----"; static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] __nonstring = "cookie--"; static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 pubkey[NOISE_PUBLIC_KEY_LEN], const u8 label[COOKIE_KEY_LABEL_LEN]) { struct blake2s_state blake; blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); blake2s_final(&blake, key); } /* Must hold peer->handshake.static_identity->lock */ void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) { if (likely(checker->device->static_identity.has_identity)) { precompute_key(checker->cookie_encryption_key, checker->device->static_identity.static_public, cookie_key_label); precompute_key(checker->message_mac1_key, checker->device->static_identity.static_public, mac1_key_label); } else { memset(checker->cookie_encryption_key, 0, NOISE_SYMMETRIC_KEY_LEN); memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); } } void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) { precompute_key(peer->latest_cookie.cookie_decryption_key, peer->handshake.remote_static, cookie_key_label); precompute_key(peer->latest_cookie.message_mac1_key, peer->handshake.remote_static, mac1_key_label); } void wg_cookie_init(struct cookie *cookie) { memset(cookie, 0, sizeof(*cookie)); init_rwsem(&cookie->lock); } static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, const u8 key[NOISE_SYMMETRIC_KEY_LEN]) { len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac1); blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); } static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, const u8 cookie[COOKIE_LEN]) { len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac2); blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); } static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, struct cookie_checker *checker) { struct blake2s_state state; if (wg_birthdate_has_expired(checker->secret_birthdate, COOKIE_SECRET_MAX_AGE)) { down_write(&checker->secret_lock); checker->secret_birthdate = ktime_get_coarse_boottime_ns(); get_random_bytes(checker->secret, NOISE_HASH_LEN); up_write(&checker->secret_lock); } down_read(&checker->secret_lock); blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); if (skb->protocol == htons(ETH_P_IP)) blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, sizeof(struct in_addr)); else if (skb->protocol == htons(ETH_P_IPV6)) blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, sizeof(struct in6_addr)); blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); blake2s_final(&state, cookie); up_read(&checker->secret_lock); } enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, bool check_cookie) { struct message_macs *macs = (struct message_macs *) (skb->data + skb->len - sizeof(*macs)); enum cookie_mac_state ret; u8 computed_mac[COOKIE_LEN]; u8 cookie[COOKIE_LEN]; ret = INVALID_MAC; compute_mac1(computed_mac, skb->data, skb->len, checker->message_mac1_key); if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) goto out; ret = VALID_MAC_BUT_NO_COOKIE; if (!check_cookie) goto out; make_cookie(cookie, skb, checker); compute_mac2(computed_mac, skb->data, skb->len, cookie); if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) goto out; ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) goto out; ret = VALID_MAC_WITH_COOKIE; out: return ret; } void wg_cookie_add_mac_to_packet(void *message, size_t len, struct wg_peer *peer) { struct message_macs *macs = (struct message_macs *) ((u8 *)message + len - sizeof(*macs)); down_write(&peer->latest_cookie.lock); compute_mac1(macs->mac1, message, len, peer->latest_cookie.message_mac1_key); memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); peer->latest_cookie.have_sent_mac1 = true; up_write(&peer->latest_cookie.lock); down_read(&peer->latest_cookie.lock); if (peer->latest_cookie.is_valid && !wg_birthdate_has_expired(peer->latest_cookie.birthdate, COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) compute_mac2(macs->mac2, message, len, peer->latest_cookie.cookie); else memset(macs->mac2, 0, COOKIE_LEN); up_read(&peer->latest_cookie.lock); } void wg_cookie_message_create(struct message_handshake_cookie *dst, struct sk_buff *skb, __le32 index, struct cookie_checker *checker) { struct message_macs *macs = (struct message_macs *) ((u8 *)skb->data + skb->len - sizeof(*macs)); u8 cookie[COOKIE_LEN]; dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); dst->receiver_index = index; get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); make_cookie(cookie, skb, checker); xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, macs->mac1, COOKIE_LEN, dst->nonce, checker->cookie_encryption_key); } void wg_cookie_message_consume(struct message_handshake_cookie *src, struct wg_device *wg) { struct wg_peer *peer = NULL; u8 cookie[COOKIE_LEN]; bool ret; if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE | INDEX_HASHTABLE_KEYPAIR, src->receiver_index, &peer))) return; down_read(&peer->latest_cookie.lock); if (unlikely(!peer->latest_cookie.have_sent_mac1)) { up_read(&peer->latest_cookie.lock); goto out; } ret = xchacha20poly1305_decrypt( cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, peer->latest_cookie.cookie_decryption_key); up_read(&peer->latest_cookie.lock); if (ret) { down_write(&peer->latest_cookie.lock); memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); peer->latest_cookie.is_valid = true; peer->latest_cookie.have_sent_mac1 = false; up_write(&peer->latest_cookie.lock); } else { net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", wg->dev->name); } out: wg_peer_put(peer); } |
| 16 3 9 6 4 2 4 7 6 46 38 9 1 46 2 51 52 53 2 6 11 31 33 1 1 3 2 26 22 2 20 1 15 3 57 57 57 13 1 1 1 39 1 51 33 3 6 41 28 968 972 149 819 71 2 2 2 3 2 2 1 1 1 2 2 1 2 2 1 3 3 3 1 3 1 2 1 4 2 2 3 4 1 14 2 1 1 5 2 1 2 1 2 1 1 1 558 2 2 1 7 4 4 2 1 2 2 3 2 1 1 1 1 2 1 2 1 2 3 2 1 1 1 2 3 1 1 3 1 1 2 2 2 2 2 2 2 2 1 2 2 1 3 2 3 2 4 2 2 1 6 2 3 1 1 21 5 3 1 1 2 3 34 9 2 22 33 3 6 6 98 1 40 1 1035 16 54 617 349 5 1 1 1 1 1 2 2 13 1 11 1 4 6 143 146 10 132 34 5 3 1 13 1 1 24 6 23 8 22 8 22 8 1 20 10 24 6 2 1 1 1 1 1 1 1 1 5 2 3 2 1 1 1 1 1 1 1 1 4 1 3 1 2 1 1 1 2 1 2 2 1 1 2 1 1 1 1 1 1 2 1 3 1 2 2 1 1 1 1 36 35 179 4 32 114 2 24 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 BSD socket options interface * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/net/ipv4/ip_sockglue.c * * FIXME: Make the setsockopt code POSIX compliant: That is * * o Truncate getsockopt returns * o Return an optlen of the truncated length if need be * * Changes: * David L Stevens <dlstevens@us.ibm.com>: * - added multicast source filtering API for MLDv2 */ #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/mroute6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/netfilter.h> #include <linux/slab.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/inet_common.h> #include <net/tcp.h> #include <net/udp.h> #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> #include <net/seg6.h> #include <net/psp.h> #include <linux/uaccess.h> struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); DEFINE_STATIC_KEY_FALSE(ip6_min_hopcount); int ip6_ra_control(struct sock *sk, int sel) { struct ip6_ra_chain *ra, *new_ra, **rap; /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW) return -ENOPROTOOPT; new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; if (sel >= 0 && !new_ra) return -ENOMEM; write_lock_bh(&ip6_ra_lock); for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (sel >= 0) { write_unlock_bh(&ip6_ra_lock); kfree(new_ra); return -EADDRINUSE; } *rap = ra->next; write_unlock_bh(&ip6_ra_lock); sock_put(sk); kfree(ra); return 0; } } if (!new_ra) { write_unlock_bh(&ip6_ra_lock); return -ENOBUFS; } new_ra->sk = sk; new_ra->sel = sel; new_ra->next = ra; *rap = new_ra; sock_hold(sk); write_unlock_bh(&ip6_ra_lock); return 0; } struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt) { if (inet_test_bit(IS_ICSK, sk)) { if (opt && !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) { struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ext_hdr_len = psp_sk_overhead(sk) + opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt))); sk_dst_reset(sk); return opt; } static int copy_group_source_from_sockptr(struct group_source_req *greqs, sockptr_t optval, int optlen) { if (in_compat_syscall()) { struct compat_group_source_req gr32; if (optlen < sizeof(gr32)) return -EINVAL; if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; greqs->gsr_interface = gr32.gsr_interface; greqs->gsr_group = gr32.gsr_group; greqs->gsr_source = gr32.gsr_source; } else { if (optlen < sizeof(*greqs)) return -EINVAL; if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) return -EFAULT; } return 0; } static int do_ipv6_mcast_group_source(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct group_source_req greqs; int omode, add; int ret; ret = copy_group_source_from_sockptr(&greqs, optval, optlen); if (ret) return ret; if (greqs.gsr_group.ss_family != AF_INET6 || greqs.gsr_source.ss_family != AF_INET6) return -EADDRNOTAVAIL; if (optname == MCAST_BLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 1; } else if (optname == MCAST_UNBLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 0; } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct sockaddr_in6 *psin6; int retv; psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, &psin6->sin6_addr, MCAST_INCLUDE); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) return retv; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } return ip6_mc_source(add, omode, sk, &greqs); } static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { struct group_filter *gsf; int ret; if (optlen < GROUP_FILTER_SIZE(0)) return -EINVAL; if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) return -ENOBUFS; gsf = memdup_sockptr(optval, optlen); if (IS_ERR(gsf)) return PTR_ERR(gsf); /* numsrc >= (4G-140)/128 overflow in 32 bits */ ret = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffffU || gsf->gf_numsrc > sysctl_mld_max_msf) goto out_free_gsf; ret = -EINVAL; if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; } static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; int n; if (optlen < size0) return -EINVAL; if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4) return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); if (!p) return -ENOMEM; gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; /* numsrc >= (4G-140)/128 overflow in 32 bits */ ret = -ENOBUFS; n = gf32->gf_numsrc; if (n >= 0x1ffffffU || n > sysctl_mld_max_msf) goto out_free_p; ret = -EINVAL; if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); return ret; } static int ipv6_mcast_join_leave(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct sockaddr_in6 *psin6; struct group_req greq; if (optlen < sizeof(greq)) return -EINVAL; if (copy_from_sockptr(&greq, optval, sizeof(greq))) return -EFAULT; if (greq.gr_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) return ipv6_sock_mc_join(sk, greq.gr_interface, &psin6->sin6_addr); return ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr); } static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct compat_group_req gr32; struct sockaddr_in6 *psin6; if (optlen < sizeof(gr32)) return -EINVAL; if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; if (gr32.gr_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; psin6 = (struct sockaddr_in6 *)&gr32.gr_group; if (optname == MCAST_JOIN_GROUP) return ipv6_sock_mc_join(sk, gr32.gr_interface, &psin6->sin6_addr); return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr); } static int ipv6_set_opt_hdr(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_opt_hdr *new = NULL; struct net *net = sock_net(sk); struct ipv6_txoptions *opt; int err; /* hop-by-hop / destination options are privileged option */ if (optname != IPV6_RTHDR && !sockopt_ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; /* remove any sticky options header with a zero option * length, per RFC3542. */ if (optlen > 0) { if (sockptr_is_null(optval)) return -EINVAL; if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) return -EINVAL; new = memdup_sockptr(optval, optlen); if (IS_ERR(new)) return PTR_ERR(new); if (unlikely(ipv6_optlen(new) > optlen)) { kfree(new); return -EINVAL; } } opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); opt = ipv6_renew_options(sk, opt, optname, new); kfree(new); if (IS_ERR(opt)) return PTR_ERR(opt); /* routing header option needs extra check */ err = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) goto sticky_done; break; #endif case IPV6_SRCRT_TYPE_4: { struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; if (!seg6_validate_srh(srh, optlen, false)) goto sticky_done; break; } default: goto sticky_done; } } err = 0; opt = ipv6_update_options(sk, opt); sticky_done: if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } return err; } int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int retv = -ENOPROTOOPT; int val, valbool; if (sockptr_is_null(optval)) val = 0; else { if (optlen >= sizeof(int)) { if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; } else val = 0; } valbool = (val != 0); if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); /* Handle options that can be set without locking the socket. */ switch (optname) { case IPV6_UNICAST_HOPS: if (optlen < sizeof(int)) return -EINVAL; if (val > 255 || val < -1) return -EINVAL; WRITE_ONCE(np->hop_limit, val); return 0; case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) return -EINVAL; if (val != valbool) return -EINVAL; inet6_assign_bit(MC6_LOOP, sk, valbool); return 0; case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) return retv; if (optlen < sizeof(int)) return -EINVAL; if (val > 255 || val < -1) return -EINVAL; WRITE_ONCE(np->mcast_hops, val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); return 0; case IPV6_MTU: if (optlen < sizeof(int)) return -EINVAL; if (val && val < IPV6_MIN_MTU) return -EINVAL; WRITE_ONCE(np->frag_size, val); return 0; case IPV6_MINHOPCOUNT: if (optlen < sizeof(int)) return -EINVAL; if (val < 0 || val > 255) return -EINVAL; if (val) static_branch_enable(&ip6_min_hopcount); /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount * while we are changing it. */ WRITE_ONCE(np->min_hopcount, val); return 0; case IPV6_RECVERR_RFC4884: if (optlen < sizeof(int)) return -EINVAL; if (val < 0 || val > 1) return -EINVAL; inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); return 0; case IPV6_MULTICAST_ALL: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(MC6_ALL, sk, valbool); return 0; case IPV6_AUTOFLOWLABEL: inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); inet6_set_bit(AUTOFLOWLABEL_SET, sk); return 0; case IPV6_DONTFRAG: inet6_assign_bit(DONTFRAG, sk, valbool); return 0; case IPV6_RECVERR: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(RECVERR6, sk, valbool); if (!val) skb_errqueue_purge(&sk->sk_error_queue); return 0; case IPV6_ROUTER_ALERT_ISOLATE: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); return 0; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) return -EINVAL; if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) return -EINVAL; WRITE_ONCE(np->pmtudisc, val); return 0; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) return -EINVAL; inet6_assign_bit(SNDFLOW, sk, valbool); return 0; case IPV6_ADDR_PREFERENCES: if (optlen < sizeof(int)) return -EINVAL; return ip6_sock_set_addr_preferences(sk, val); case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) return -ENOPROTOOPT; if (optlen < sizeof(int)) return -EINVAL; if (val) { struct net_device *dev; int bound_dev_if, midx; rcu_read_lock(); dev = dev_get_by_index_rcu(net, val); if (!dev) { rcu_read_unlock(); return -ENODEV; } midx = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); if (bound_dev_if && bound_dev_if != val && (!midx || midx != bound_dev_if)) return -EINVAL; } WRITE_ONCE(np->mcast_oif, val); return 0; case IPV6_UNICAST_IF: { struct net_device *dev; int ifindex; if (optlen != sizeof(int)) return -EINVAL; ifindex = (__force int)ntohl((__force __be32)val); if (!ifindex) { WRITE_ONCE(np->ucast_oif, 0); return 0; } dev = dev_get_by_index(net, ifindex); if (!dev) return -EADDRNOTAVAIL; dev_put(dev); if (READ_ONCE(sk->sk_bound_dev_if)) return -EINVAL; WRITE_ONCE(np->ucast_oif, ifindex); return 0; } } sockopt_lock_sock(sk); /* Another thread has converted the socket into IPv4 with * IPV6_ADDRFORM concurrently. */ if (unlikely(sk->sk_family != AF_INET6)) goto unlock; switch (optname) { case IPV6_ADDRFORM: if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { if (sk->sk_type == SOCK_RAW) break; if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE) { struct udp_sock *up = udp_sk(sk); if (up->pending == AF_INET6) { retv = -EBUSY; break; } } else if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_prot != &tcpv6_prot) { retv = -EBUSY; break; } } else { break; } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; } if (ipv6_only_sock(sk) || !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */ WRITE_ONCE(sk->sk_prot, &tcp_prot); /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); WRITE_ONCE(sk->sk_socket->ops, &inet_stream_ops); WRITE_ONCE(sk->sk_family, PF_INET); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct proto *prot = &udp_prot; if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ WRITE_ONCE(sk->sk_prot, prot); WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops); WRITE_ONCE(sk->sk_family, PF_INET); } /* Disable all options not to allocate memory anymore, * but there is still a race. See the lockless path * in udpv6_sendmsg() and ipv6_local_rxpmtu(). */ np->rxopt.all = 0; inet6_cleanup_sock(sk); module_put(THIS_MODULE); retv = 0; break; } goto e_inval; case IPV6_V6ONLY: if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; sk->sk_ipv6only = valbool; retv = 0; break; case IPV6_RECVPKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxinfo = valbool; retv = 0; break; case IPV6_2292PKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxoinfo = valbool; retv = 0; break; case IPV6_RECVHOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxhlim = valbool; retv = 0; break; case IPV6_2292HOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxohlim = valbool; retv = 0; break; case IPV6_RECVRTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.srcrt = valbool; retv = 0; break; case IPV6_2292RTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.osrcrt = valbool; retv = 0; break; case IPV6_RECVHOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.hopopts = valbool; retv = 0; break; case IPV6_2292HOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.ohopopts = valbool; retv = 0; break; case IPV6_RECVDSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.dstopts = valbool; retv = 0; break; case IPV6_2292DSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.odstopts = valbool; retv = 0; break; case IPV6_TCLASS: if (optlen < sizeof(int)) goto e_inval; if (val < -1 || val > 0xff) goto e_inval; /* RFC 3542, 6.5: default traffic class of 0x0 */ if (val == -1) val = 0; if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; val |= np->tclass & INET_ECN_MASK; } if (np->tclass != val) { np->tclass = val; sk_dst_reset(sk); } retv = 0; break; case IPV6_RECVTCLASS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxtclass = valbool; retv = 0; break; case IPV6_FLOWINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxflow = valbool; retv = 0; break; case IPV6_RECVPATHMTU: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxpmtu = valbool; retv = 0; break; case IPV6_TRANSPARENT: if (valbool && !sockopt_ns_capable(net->user_ns, CAP_NET_RAW) && !sockopt_ns_capable(net->user_ns, CAP_NET_ADMIN)) { retv = -EPERM; break; } if (optlen < sizeof(int)) goto e_inval; /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */ inet_assign_bit(TRANSPARENT, sk, valbool); retv = 0; break; case IPV6_FREEBIND: if (optlen < sizeof(int)) goto e_inval; /* we also don't have a separate freebind bit for IPV6 */ inet_assign_bit(FREEBIND, sk, valbool); retv = 0; break; case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxorigdstaddr = valbool; retv = 0; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: retv = ipv6_set_opt_hdr(sk, optname, optval, optlen); break; case IPV6_PKTINFO: { struct in6_pktinfo pkt; if (optlen == 0) goto e_inval; else if (optlen < sizeof(struct in6_pktinfo) || sockptr_is_null(optval)) goto e_inval; if (copy_from_sockptr(&pkt, optval, sizeof(pkt))) { retv = -EFAULT; break; } if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex)) goto e_inval; np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr; retv = 0; break; } case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; struct ipcm6_cookie ipc6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; /* 1K is probably excessive * 1K is surely not enough, 2K per standard header is 16K. */ retv = -EINVAL; if (optlen > 64*1024) break; opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; if (!opt) break; memset(opt, 0, sizeof(*opt)); refcount_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_sockptr(opt + 1, optval, optlen)) goto done; msg.msg_controllen = optlen; msg.msg_control_is_user = false; msg.msg_control = (void *)(opt+1); ipc6.opt = opt; retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6); if (retv) goto done; update: retv = 0; opt = ipv6_update_options(sk, opt); done: if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } break; } case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EPROTO; if (inet_test_bit(IS_ICSK, sk)) break; retv = -EFAULT; if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_ADD_MEMBERSHIP) retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } case IPV6_JOIN_ANYCAST: case IPV6_LEAVE_ANYCAST: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EFAULT; if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_JOIN_ANYCAST) retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); else retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) retv = compat_ipv6_mcast_join_leave(sk, optname, optval, optlen); else retv = ipv6_mcast_join_leave(sk, optname, optval, optlen); break; case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: retv = do_ipv6_mcast_group_source(sk, optname, optval, optlen); break; case MCAST_MSFILTER: if (in_compat_syscall()) retv = compat_ipv6_set_mcast_msfilter(sk, optval, optlen); else retv = ipv6_set_mcast_msfilter(sk, optval, optlen); break; case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; retv = ip6_ra_control(sk, val); if (retv == 0) inet6_assign_bit(RTALERT, sk, valbool); break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: retv = -EPERM; if (!sockopt_ns_capable(net->user_ns, CAP_NET_ADMIN)) break; retv = xfrm_user_policy(sk, optname, optval, optlen); break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; } unlock: sockopt_release_sock(sk); return retv; e_inval: retv = -EINVAL; goto unlock; } int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { int err; if (level == SOL_IP && sk->sk_type != SOCK_RAW) return ip_setsockopt(sk, level, optname, optval, optlen); if (level != SOL_IPV6) return -ENOPROTOOPT; err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && optname != IPV6_XFRM_POLICY) err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); #endif return err; } EXPORT_SYMBOL(ipv6_setsockopt); static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, int optname, sockptr_t optval, int len) { struct ipv6_opt_hdr *hdr; if (!opt) return 0; switch (optname) { case IPV6_HOPOPTS: hdr = opt->hopopt; break; case IPV6_RTHDRDSTOPTS: hdr = opt->dst0opt; break; case IPV6_RTHDR: hdr = (struct ipv6_opt_hdr *)opt->srcrt; break; case IPV6_DSTOPTS: hdr = opt->dst1opt; break; default: return -EINVAL; /* should not happen */ } if (!hdr) return 0; len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_sockptr(optval, hdr, len)) return -EFAULT; return len; } static int ipv6_get_msfilter(struct sock *sk, sockptr_t optval, sockptr_t optlen, int len) { const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter gsf; int num; int err; if (len < size0) return -EINVAL; if (copy_from_sockptr(&gsf, optval, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; num = gsf.gf_numsrc; sockopt_lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, optval, size0); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; len = GROUP_FILTER_SIZE(num); if (copy_to_sockptr(optlen, &len, sizeof(int)) || copy_to_sockptr(optval, &gsf, size0)) err = -EFAULT; } sockopt_release_sock(sk); return err; } static int compat_ipv6_get_msfilter(struct sock *sk, sockptr_t optval, sockptr_t optlen, int len) { const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter gf32; struct group_filter gf; int err; int num; if (len < size0) return -EINVAL; if (copy_from_sockptr(&gf32, optval, size0)) return -EFAULT; gf.gf_interface = gf32.gf_interface; gf.gf_fmode = gf32.gf_fmode; num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; if (gf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; sockopt_lock_sock(sk); err = ip6_mc_msfget(sk, &gf, optval, size0); sockopt_release_sock(sk); if (err) return err; if (num > gf.gf_numsrc) num = gf.gf_numsrc; len = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); if (copy_to_sockptr(optlen, &len, sizeof(int)) || copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode), &gf.gf_fmode, sizeof(gf32.gf_fmode)) || copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc), &gf.gf_numsrc, sizeof(gf32.gf_numsrc))) return -EFAULT; return 0; } int do_ipv6_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen) { struct ipv6_pinfo *np = inet6_sk(sk); int len; int val; if (ip6_mroute_opt(optname)) return ip6_mroute_getsockopt(sk, optname, optval, optlen); if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; break; case MCAST_MSFILTER: if (in_compat_syscall()) return compat_ipv6_get_msfilter(sk, optval, optlen, len); return ipv6_get_msfilter(sk, optval, optlen, len); case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; if (optval.is_kernel) { msg.msg_control_is_user = false; msg.msg_control = optval.kernel; } else { msg.msg_control_is_user = true; msg.msg_control_user = optval.user; } msg.msg_controllen = len; msg.msg_flags = 0; sockopt_lock_sock(sk); skb = np->pktoptions; if (skb) ip6_datagram_recv_ctl(sk, &msg, skb); sockopt_release_sock(sk); if (!skb) { if (np->rxopt.bits.rxinfo) { int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { int hlim = READ_ONCE(np->mcast_hops); put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { int tclass = (int)ip6_tclass(np->rcv_flowinfo); put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { int hlim = READ_ONCE(np->mcast_hops); put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { __be32 flowinfo = np->rcv_flowinfo; put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } } len -= msg.msg_controllen; return copy_to_sockptr(optlen, &len, sizeof(int)); } case IPV6_MTU: { struct dst_entry *dst; val = 0; rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) val = dst_mtu(dst); rcu_read_unlock(); if (!val) return -ENOTCONN; break; } case IPV6_V6ONLY: val = sk->sk_ipv6only; break; case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; case IPV6_2292PKTINFO: val = np->rxopt.bits.rxoinfo; break; case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; case IPV6_2292HOPLIMIT: val = np->rxopt.bits.rxohlim; break; case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; case IPV6_2292RTHDR: val = np->rxopt.bits.osrcrt; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; sockopt_lock_sock(sk); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); sockopt_release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) return len; return copy_to_sockptr(optlen, &len, sizeof(int)); } case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; case IPV6_2292HOPOPTS: val = np->rxopt.bits.ohopopts; break; case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; case IPV6_2292DSTOPTS: val = np->rxopt.bits.odstopts; break; case IPV6_TCLASS: val = np->tclass; break; case IPV6_RECVTCLASS: val = np->rxopt.bits.rxtclass; break; case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; case IPV6_RECVPATHMTU: val = np->rxopt.bits.rxpmtu; break; case IPV6_PATHMTU: { struct dst_entry *dst; struct ip6_mtuinfo mtuinfo; if (len < sizeof(mtuinfo)) return -EINVAL; len = sizeof(mtuinfo); memset(&mtuinfo, 0, sizeof(mtuinfo)); rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) mtuinfo.ip6m_mtu = dst_mtu(dst); rcu_read_unlock(); if (!mtuinfo.ip6m_mtu) return -ENOTCONN; if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &mtuinfo, len)) return -EFAULT; return 0; } case IPV6_TRANSPARENT: val = inet_test_bit(TRANSPARENT, sk); break; case IPV6_FREEBIND: val = inet_test_bit(FREEBIND, sk); break; case IPV6_RECVORIGDSTADDR: val = np->rxopt.bits.rxorigdstaddr; break; case IPV6_UNICAST_HOPS: case IPV6_MULTICAST_HOPS: { struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) val = READ_ONCE(np->hop_limit); else val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); dst = __sk_dst_get(sk); if (dst) val = ip6_dst_hoplimit(dst); rcu_read_unlock(); } if (val < 0) val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit); break; } case IPV6_MULTICAST_LOOP: val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: val = READ_ONCE(np->mcast_oif); break; case IPV6_MULTICAST_ALL: val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif)); break; case IPV6_MTU_DISCOVER: val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: { struct in6_flowlabel_req freq; int flags; if (len < sizeof(freq)) return -EINVAL; if (copy_from_sockptr(&freq, optval, sizeof(freq))) return -EFAULT; if (freq.flr_action != IPV6_FL_A_GET) return -EINVAL; len = sizeof(freq); flags = freq.flr_flags; memset(&freq, 0, sizeof(freq)); val = ipv6_flowlabel_opt_get(sk, &freq, flags); if (val < 0) return val; if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &freq, len)) return -EFAULT; return 0; } case IPV6_ADDR_PREFERENCES: { u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; } case IPV6_MINHOPCOUNT: val = READ_ONCE(np->min_hopcount); break; case IPV6_DONTFRAG: val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: val = np->rxopt.bits.recvfragsize; break; case IPV6_ROUTER_ALERT: val = inet6_test_bit(RTALERT, sk); break; case IPV6_ROUTER_ALERT_ISOLATE: val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, len)) return -EFAULT; return 0; } int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { int err; if (level == SOL_IP && sk->sk_type != SOCK_RAW) return ip_getsockopt(sk, level, optname, optval, optlen); if (level != SOL_IPV6) return -ENOPROTOOPT; err = do_ipv6_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { int len; if (get_user(len, optlen)) return -EFAULT; err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } #endif return err; } EXPORT_SYMBOL(ipv6_getsockopt); |
| 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/pnode.h * * (C) Copyright IBM Corporation 2005. */ #ifndef _LINUX_PNODE_H #define _LINUX_PNODE_H #include <linux/list.h> #include "mount.h" #define IS_MNT_SHARED(m) ((m)->mnt_t_flags & T_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) #define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt_t_flags &= ~T_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt_t_flags & T_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt_t_flags & T_MARKED) #define SET_MNT_MARK(m) ((m)->mnt_t_flags |= T_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt_t_flags &= ~T_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 #define CL_COPY_UNBINDABLE 0x04 #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_COPY_MNT_NS_FILE 0x40 /* * EXCL[namespace_sem] */ static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt_t_flags &= ~T_SHARED_MASK; mnt->mnt_t_flags |= T_SHARED; } static inline bool peers(const struct mount *m1, const struct mount *m2) { return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; } void change_mnt_propagation(struct mount *, int); void bulk_make_private(struct list_head *); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, struct hlist_head *); void propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); int mnt_get_count(struct mount *mnt); void mnt_set_mountpoint(struct mount *, struct mountpoint *, struct mount *); void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt); struct mount *copy_tree(struct mount *, struct dentry *, int); bool is_path_reachable(struct mount *, struct dentry *, const struct path *root); int count_mounts(struct mnt_namespace *ns, struct mount *mnt); bool propagation_would_overmount(const struct mount *from, const struct mount *to, const struct mountpoint *mp); #endif /* _LINUX_PNODE_H */ |
| 6 6 10 10 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Public Key Encryption * * Copyright (c) 2015, Intel Corporation * Authors: Tadeusz Struk <tadeusz.struk@intel.com> */ #include <crypto/internal/akcipher.h> #include <linux/cryptouser.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> #include <net/netlink.h> #include "internal.h" #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e struct crypto_akcipher_sync_data { struct crypto_akcipher *tfm; const void *src; void *dst; unsigned int slen; unsigned int dlen; struct akcipher_request *req; struct crypto_wait cwait; struct scatterlist sg; u8 *buf; }; static int __maybe_unused crypto_akcipher_report( struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; memset(&rakcipher, 0, sizeof(rakcipher)); strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(rakcipher), &rakcipher); } static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) { seq_puts(m, "type : akcipher\n"); } static void crypto_akcipher_exit_tfm(struct crypto_tfm *tfm) { struct crypto_akcipher *akcipher = __crypto_akcipher_tfm(tfm); struct akcipher_alg *alg = crypto_akcipher_alg(akcipher); alg->exit(akcipher); } static int crypto_akcipher_init_tfm(struct crypto_tfm *tfm) { struct crypto_akcipher *akcipher = __crypto_akcipher_tfm(tfm); struct akcipher_alg *alg = crypto_akcipher_alg(akcipher); if (alg->exit) akcipher->base.exit = crypto_akcipher_exit_tfm; if (alg->init) return alg->init(akcipher); return 0; } static void crypto_akcipher_free_instance(struct crypto_instance *inst) { struct akcipher_instance *akcipher = akcipher_instance(inst); akcipher->free(akcipher); } static const struct crypto_type crypto_akcipher_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_akcipher_init_tfm, .free = crypto_akcipher_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_akcipher_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_akcipher_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, .type = CRYPTO_ALG_TYPE_AKCIPHER, .tfmsize = offsetof(struct crypto_akcipher, base), .algsize = offsetof(struct akcipher_alg, base), }; int crypto_grab_akcipher(struct crypto_akcipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_akcipher_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_akcipher); struct crypto_akcipher *crypto_alloc_akcipher(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_akcipher_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_akcipher); static void akcipher_prepare_alg(struct akcipher_alg *alg) { struct crypto_alg *base = &alg->base; base->cra_type = &crypto_akcipher_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_AKCIPHER; } static int akcipher_default_op(struct akcipher_request *req) { return -ENOSYS; } static int akcipher_default_set_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { return -ENOSYS; } int crypto_register_akcipher(struct akcipher_alg *alg) { struct crypto_alg *base = &alg->base; if (!alg->encrypt) alg->encrypt = akcipher_default_op; if (!alg->decrypt) alg->decrypt = akcipher_default_op; if (!alg->set_priv_key) alg->set_priv_key = akcipher_default_set_key; akcipher_prepare_alg(alg); return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_akcipher); void crypto_unregister_akcipher(struct akcipher_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_akcipher); int akcipher_register_instance(struct crypto_template *tmpl, struct akcipher_instance *inst) { if (WARN_ON(!inst->free)) return -EINVAL; akcipher_prepare_alg(&inst->alg); return crypto_register_instance(tmpl, akcipher_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(akcipher_register_instance); static int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data) { unsigned int reqsize = crypto_akcipher_reqsize(data->tfm); struct akcipher_request *req; struct scatterlist *sg; unsigned int mlen; unsigned int len; u8 *buf; mlen = max(data->slen, data->dlen); len = sizeof(*req) + reqsize + mlen; if (len < mlen) return -EOVERFLOW; req = kzalloc(len, GFP_KERNEL); if (!req) return -ENOMEM; data->req = req; akcipher_request_set_tfm(req, data->tfm); buf = (u8 *)(req + 1) + reqsize; data->buf = buf; memcpy(buf, data->src, data->slen); sg = &data->sg; sg_init_one(sg, buf, mlen); akcipher_request_set_crypt(req, sg, sg, data->slen, data->dlen); crypto_init_wait(&data->cwait); akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &data->cwait); return 0; } static int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data, int err) { err = crypto_wait_req(err, &data->cwait); memcpy(data->dst, data->buf, data->dlen); data->dlen = data->req->dst_len; kfree_sensitive(data->req); return err; } int crypto_akcipher_sync_encrypt(struct crypto_akcipher *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { struct crypto_akcipher_sync_data data = { .tfm = tfm, .src = src, .dst = dst, .slen = slen, .dlen = dlen, }; return crypto_akcipher_sync_prep(&data) ?: crypto_akcipher_sync_post(&data, crypto_akcipher_encrypt(data.req)); } EXPORT_SYMBOL_GPL(crypto_akcipher_sync_encrypt); int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm, const void *src, unsigned int slen, void *dst, unsigned int dlen) { struct crypto_akcipher_sync_data data = { .tfm = tfm, .src = src, .dst = dst, .slen = slen, .dlen = dlen, }; return crypto_akcipher_sync_prep(&data) ?: crypto_akcipher_sync_post(&data, crypto_akcipher_decrypt(data.req)) ?: data.dlen; } EXPORT_SYMBOL_GPL(crypto_akcipher_sync_decrypt); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic public key cipher type"); |
| 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 | // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/bvec.h> #include <linux/crc32c.h> #include <linux/net.h> #include <linux/socket.h> #include <net/sock.h> #include <linux/ceph/ceph_features.h> #include <linux/ceph/decode.h> #include <linux/ceph/libceph.h> #include <linux/ceph/messenger.h> /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; /* * If @buf is NULL, discard up to @len bytes. */ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) { struct kvec iov = {buf, len}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; if (!buf) msg.msg_flags |= MSG_TRUNC; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; } static int ceph_tcp_recvpage(struct socket *sock, struct page *page, int page_offset, size_t length) { struct bio_vec bvec; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; BUG_ON(page_offset + length > PAGE_SIZE); bvec_set_page(&bvec, page, length, page_offset); iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; } /* * write something. @more is true if caller will be sending more data * shortly. */ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, size_t kvlen, size_t len, bool more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; if (more) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ r = kernel_sendmsg(sock, &msg, iov, kvlen, len); if (r == -EAGAIN) r = 0; return r; } /* * @more: MSG_MORE or 0. */ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more, }; struct bio_vec bvec; int ret; /* * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0, * we need to fall back to sendmsg if that's the case. * * Same goes for slab pages: skb_can_coalesce() allows * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ if (sendpage_ok(page)) msg.msg_flags |= MSG_SPLICE_PAGES; bvec_set_page(&bvec, page, size, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); ret = sock_sendmsg(sock, &msg); if (ret == -EAGAIN) ret = 0; return ret; } static void con_out_kvec_reset(struct ceph_connection *con) { BUG_ON(con->v1.out_skip); con->v1.out_kvec_left = 0; con->v1.out_kvec_bytes = 0; con->v1.out_kvec_cur = &con->v1.out_kvec[0]; } static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { int index = con->v1.out_kvec_left; BUG_ON(con->v1.out_skip); BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec)); con->v1.out_kvec[index].iov_len = size; con->v1.out_kvec[index].iov_base = data; con->v1.out_kvec_left++; con->v1.out_kvec_bytes += size; } /* * Chop off a kvec from the end. Return residual number of bytes for * that kvec, i.e. how many bytes would have been written if the kvec * hadn't been nuked. */ static int con_out_kvec_skip(struct ceph_connection *con) { int skip = 0; if (con->v1.out_kvec_bytes > 0) { skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len; BUG_ON(con->v1.out_kvec_bytes < skip); BUG_ON(!con->v1.out_kvec_left); con->v1.out_kvec_bytes -= skip; con->v1.out_kvec_left--; } return skip; } static size_t sizeof_footer(struct ceph_connection *con) { return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? sizeof(struct ceph_msg_footer) : sizeof(struct ceph_msg_footer_old); } static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { /* Initialize data cursor if it's not a sparse read */ u64 len = msg->sparse_read_total ? : data_len; ceph_msg_data_cursor_init(&msg->cursor, msg, len); } /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ static void prepare_write_message_footer(struct ceph_connection *con, struct ceph_msg *m) { m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); con_out_kvec_add(con, sizeof_footer(con), &m->footer); if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) con->ops->sign_message(m); else m->footer.sig = 0; } else { m->old_footer.flags = m->footer.flags; } con->v1.out_more = m->more_to_follow; con->v1.out_msg_done = true; } /* * Prepare headers for the next outgoing message. */ static void prepare_write_message(struct ceph_connection *con, struct ceph_msg *m) { u32 crc; con_out_kvec_reset(con); con->v1.out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); } dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), m->data_length); WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len)); WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); m->hdr.crc = cpu_to_le32(crc); memcpy(&con->v1.out_hdr, &m->hdr, sizeof(con->v1.out_hdr)); /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); m->footer.front_crc = cpu_to_le32(crc); if (m->middle) { crc = crc32c(0, m->middle->vec.iov_base, m->middle->vec.iov_len); m->footer.middle_crc = cpu_to_le32(crc); } else m->footer.middle_crc = 0; dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(m->footer.front_crc), le32_to_cpu(m->footer.middle_crc)); m->footer.flags = 0; /* is there a data payload? */ m->footer.data_crc = 0; if (m->data_length) { prepare_message_data(m, m->data_length); con->v1.out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ prepare_write_message_footer(con, m); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare an ack. */ static void prepare_write_ack(struct ceph_connection *con) { dout("prepare_write_ack %p %llu -> %llu\n", con, con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); con->v1.out_more = 1; /* more will follow.. eventually.. */ ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare to share the seq during handshake */ static void prepare_write_seq(struct ceph_connection *con) { dout("prepare_write_seq %p %llu -> %llu\n", con, con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; con_out_kvec_reset(con); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare to write keepalive byte. */ static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { struct timespec64 now; ktime_get_real_ts64(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2), &con->v1.out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Connection negotiation. */ static int get_connect_authorizer(struct ceph_connection *con) { struct ceph_auth_handshake *auth; int auth_proto; if (!con->ops->get_authorizer) { con->v1.auth = NULL; con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->v1.out_connect.authorizer_len = 0; return 0; } auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry); if (IS_ERR(auth)) return PTR_ERR(auth); con->v1.auth = auth; con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto); con->v1.out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); return 0; } /* * We connected to a peer and are saying hello. */ static void prepare_write_banner(struct ceph_connection *con) { con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), &con->msgr->my_enc_addr); con->v1.out_more = 0; ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } static void __prepare_write_connect(struct ceph_connection *con) { con_out_kvec_add(con, sizeof(con->v1.out_connect), &con->v1.out_connect); if (con->v1.auth) con_out_kvec_add(con, con->v1.auth->authorizer_buf_len, con->v1.auth->authorizer_buf); con->v1.out_more = 0; ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = ceph_get_global_seq(con->msgr, 0); int proto; int ret; switch (con->peer_name.type) { case CEPH_ENTITY_TYPE_MON: proto = CEPH_MONC_PROTOCOL; break; case CEPH_ENTITY_TYPE_OSD: proto = CEPH_OSDC_PROTOCOL; break; case CEPH_ENTITY_TYPE_MDS: proto = CEPH_MDSC_PROTOCOL; break; default: BUG(); } dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->v1.connect_seq, global_seq, proto); con->v1.out_connect.features = cpu_to_le64(from_msgr(con->msgr)->supported_features); con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq); con->v1.out_connect.global_seq = cpu_to_le32(global_seq); con->v1.out_connect.protocol_version = cpu_to_le32(proto); con->v1.out_connect.flags = 0; ret = get_connect_authorizer(con); if (ret) return ret; __prepare_write_connect(con); return 0; } /* * write as much of pending kvecs to the socket as we can. * 1 -> done * 0 -> socket full, but more to do * <0 -> error */ static int write_partial_kvec(struct ceph_connection *con) { int ret; dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes); while (con->v1.out_kvec_bytes > 0) { ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur, con->v1.out_kvec_left, con->v1.out_kvec_bytes, con->v1.out_more); if (ret <= 0) goto out; con->v1.out_kvec_bytes -= ret; if (!con->v1.out_kvec_bytes) break; /* done */ /* account for full iov entries consumed */ while (ret >= con->v1.out_kvec_cur->iov_len) { BUG_ON(!con->v1.out_kvec_left); ret -= con->v1.out_kvec_cur->iov_len; con->v1.out_kvec_cur++; con->v1.out_kvec_left--; } /* and for a partially-consumed entry */ if (ret) { con->v1.out_kvec_cur->iov_len -= ret; con->v1.out_kvec_cur->iov_base += ret; } } con->v1.out_kvec_left = 0; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret); return ret; /* done! */ } /* * Write as much message data payload as we can. If we finish, queue * up the footer. * 1 -> done, footer is now queued in out_kvec[]. * 0 -> socket full, but more to do * <0 -> error */ static int write_partial_message_data(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc; dout("%s %p msg %p\n", __func__, con, msg); if (!msg->num_data_items) return -EINVAL; /* * Iterate through each page that contains data to be * written, and send as much as possible for each. * * If we are calculating the data crc (the default), we will * need to map the page. If we have no pages, they have * been revoked, so use the zero page. */ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; while (cursor->total_resid) { struct page *page; size_t page_offset; size_t length; int ret; if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, MSG_MORE); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); return ret; } if (do_datacrc && cursor->need_crc) crc = ceph_crc32c_page(crc, page, page_offset, length); ceph_msg_data_advance(cursor, (size_t)ret); } dout("%s %p msg %p done\n", __func__, con, msg); /* prepare and queue up footer, too */ if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); else msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; con_out_kvec_reset(con); prepare_write_message_footer(con, msg); return 1; /* must return > 0 to indicate success */ } /* * write some zeros */ static int write_partial_skip(struct ceph_connection *con) { int ret; dout("%s %p %d left\n", __func__, con, con->v1.out_skip); while (con->v1.out_skip > 0) { size_t size = min(con->v1.out_skip, (int)PAGE_SIZE); ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size, MSG_MORE); if (ret <= 0) goto out; con->v1.out_skip -= ret; } ret = 1; out: return ret; } /* * Prepare to read connection handshake, or an ack. */ static void prepare_read_banner(struct ceph_connection *con) { dout("prepare_read_banner %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_connect(struct ceph_connection *con) { dout("prepare_read_connect %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_seq(struct ceph_connection *con) { dout("prepare_read_seq %p\n", con); con->v1.in_base_pos = 0; con->v1.in_tag = CEPH_MSGR_TAG_SEQ; } static void prepare_read_tag(struct ceph_connection *con) { dout("prepare_read_tag %p\n", con); con->v1.in_base_pos = 0; con->v1.in_tag = CEPH_MSGR_TAG_READY; } static void prepare_read_keepalive_ack(struct ceph_connection *con) { dout("prepare_read_keepalive_ack %p\n", con); con->v1.in_base_pos = 0; } /* * Prepare to read a message. */ static int prepare_read_message(struct ceph_connection *con) { dout("prepare_read_message %p\n", con); BUG_ON(con->in_msg != NULL); con->v1.in_base_pos = 0; con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; return 0; } static int read_partial(struct ceph_connection *con, int end, int size, void *object) { while (con->v1.in_base_pos < end) { int left = end - con->v1.in_base_pos; int have = size - left; int ret = ceph_tcp_recvmsg(con->sock, object + have, left); if (ret <= 0) return ret; con->v1.in_base_pos += ret; } return 1; } /* * Read all or part of the connect-side handshake on a new connection */ static int read_partial_banner(struct ceph_connection *con) { int size; int end; int ret; dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos); /* peer's banner */ size = strlen(CEPH_BANNER); end = size; ret = read_partial(con, end, size, con->v1.in_banner); if (ret <= 0) goto out; size = sizeof(con->v1.actual_peer_addr); end += size; ret = read_partial(con, end, size, &con->v1.actual_peer_addr); if (ret <= 0) goto out; ceph_decode_banner_addr(&con->v1.actual_peer_addr); size = sizeof(con->v1.peer_addr_for_me); end += size; ret = read_partial(con, end, size, &con->v1.peer_addr_for_me); if (ret <= 0) goto out; ceph_decode_banner_addr(&con->v1.peer_addr_for_me); out: return ret; } static int read_partial_connect(struct ceph_connection *con) { int size; int end; int ret; dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos); size = sizeof(con->v1.in_reply); end = size; ret = read_partial(con, end, size, &con->v1.in_reply); if (ret <= 0) goto out; if (con->v1.auth) { size = le32_to_cpu(con->v1.in_reply.authorizer_len); if (size > con->v1.auth->authorizer_reply_buf_len) { pr_err("authorizer reply too big: %d > %zu\n", size, con->v1.auth->authorizer_reply_buf_len); ret = -EINVAL; goto out; } end += size; ret = read_partial(con, end, size, con->v1.auth->authorizer_reply_buf); if (ret <= 0) goto out; } dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", con, con->v1.in_reply.tag, le32_to_cpu(con->v1.in_reply.connect_seq), le32_to_cpu(con->v1.in_reply.global_seq)); out: return ret; } /* * Verify the hello banner looks okay. */ static int verify_hello(struct ceph_connection *con) { if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { pr_err("connect to %s got bad banner\n", ceph_pr_addr(&con->peer_addr)); con->error_msg = "protocol error, bad banner"; return -1; } return 0; } static int process_banner(struct ceph_connection *con) { struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; dout("process_banner on %p\n", con); if (verify_hello(con) < 0) return -1; /* * Make sure the other end is who we wanted. note that the other * end may not yet know their ip address, so if it's 0.0.0.0, give * them the benefit of the doubt. */ if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr, sizeof(con->peer_addr)) != 0 && !(ceph_addr_is_blank(&con->v1.actual_peer_addr) && con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) { pr_warn("wrong peer, want %s/%u, got %s/%u\n", ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce), ceph_pr_addr(&con->v1.actual_peer_addr), le32_to_cpu(con->v1.actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } /* * did we learn our address? */ if (ceph_addr_is_blank(my_addr)) { memcpy(&my_addr->in_addr, &con->v1.peer_addr_for_me.in_addr, sizeof(con->v1.peer_addr_for_me.in_addr)); ceph_addr_set_port(my_addr, 0); ceph_encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", ceph_pr_addr(my_addr)); } return 0; } static int process_connect(struct ceph_connection *con) { u64 sup_feat = from_msgr(con->msgr)->supported_features; u64 req_feat = from_msgr(con->msgr)->required_features; u64 server_feat = le64_to_cpu(con->v1.in_reply.features); int ret; dout("process_connect on %p tag %d\n", con, con->v1.in_tag); if (con->v1.auth) { int len = le32_to_cpu(con->v1.in_reply.authorizer_len); /* * Any connection that defines ->get_authorizer() * should also define ->add_authorizer_challenge() and * ->verify_authorizer_reply(). * * See get_connect_authorizer(). */ if (con->v1.in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ret = con->ops->add_authorizer_challenge( con, con->v1.auth->authorizer_reply_buf, len); if (ret < 0) return ret; con_out_kvec_reset(con); __prepare_write_connect(con); prepare_read_connect(con); return 0; } if (len) { ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; return ret; } } } switch (con->v1.in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," " my %llx < server's %llx, missing %llx\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; return -1; case CEPH_MSGR_TAG_BADPROTOVER: pr_err("%s%lld %s protocol version mismatch," " my %d != server's %d\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->v1.out_connect.protocol_version), le32_to_cpu(con->v1.in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; return -1; case CEPH_MSGR_TAG_BADAUTHORIZER: con->v1.auth_retry++; dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, con->v1.auth_retry); if (con->v1.auth_retry == 2) { con->error_msg = "connect authorization failure"; return -1; } con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_RESETSESSION: /* * If we connected with a large connect_seq but the peer * has no record of a session with us (no connection, or * connect_seq == 0), they will send RESETSESION to indicate * that they must have reset their session, and may have * dropped messages. */ dout("process_connect got RESET peer seq %u\n", le32_to_cpu(con->v1.in_reply.connect_seq)); pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr)); ceph_con_reset_session(con); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); /* Tell ceph about it. */ mutex_unlock(&con->mutex); if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); if (con->state != CEPH_CON_S_V1_CONNECT_MSG) return -EAGAIN; break; case CEPH_MSGR_TAG_RETRY_SESSION: /* * If we sent a smaller connect_seq than the peer has, try * again with a larger value. */ dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", le32_to_cpu(con->v1.out_connect.connect_seq), le32_to_cpu(con->v1.in_reply.connect_seq)); con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_RETRY_GLOBAL: /* * If we sent a smaller global_seq than the peer has, try * again with a larger value. */ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", con->v1.peer_global_seq, le32_to_cpu(con->v1.in_reply.global_seq)); ceph_get_global_seq(con->msgr, le32_to_cpu(con->v1.in_reply.global_seq)); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_SEQ: case CEPH_MSGR_TAG_READY: if (req_feat & ~server_feat) { pr_err("%s%lld %s protocol feature mismatch," " my required %llx > server's %llx, need %llx\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; return -1; } WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG); con->state = CEPH_CON_S_OPEN; con->v1.auth_retry = 0; /* we authenticated; clear flag */ con->v1.peer_global_seq = le32_to_cpu(con->v1.in_reply.global_seq); con->v1.connect_seq++; con->peer_features = server_feat; dout("process_connect got READY gseq %d cseq %d (%d)\n", con->v1.peer_global_seq, le32_to_cpu(con->v1.in_reply.connect_seq), con->v1.connect_seq); WARN_ON(con->v1.connect_seq != le32_to_cpu(con->v1.in_reply.connect_seq)); if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY) ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX); con->delay = 0; /* reset backoff memory */ if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) { prepare_write_seq(con); prepare_read_seq(con); } else { prepare_read_tag(con); } break; case CEPH_MSGR_TAG_WAIT: /* * If there is a connection race (we are opening * connections to each other), one of us may just have * to WAIT. This shouldn't happen if we are the * client. */ con->error_msg = "protocol error, got WAIT as client"; return -1; default: con->error_msg = "protocol error, garbage tag during connect"; return -1; } return 0; } /* * read (part of) an ack */ static int read_partial_ack(struct ceph_connection *con) { int size = sizeof(con->v1.in_temp_ack); int end = size; return read_partial(con, end, size, &con->v1.in_temp_ack); } /* * We can finally discard anything that's been acked. */ static void process_ack(struct ceph_connection *con) { u64 ack = le64_to_cpu(con->v1.in_temp_ack); if (con->v1.in_tag == CEPH_MSGR_TAG_ACK) ceph_con_discard_sent(con, ack); else ceph_con_discard_requeued(con, ack); prepare_read_tag(con); } static int read_partial_message_chunk(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { int ret, left; BUG_ON(!section); while (section->iov_len < sec_len) { BUG_ON(section->iov_base == NULL); left = sec_len - section->iov_len; ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + section->iov_len, left); if (ret <= 0) return ret; section->iov_len += ret; } if (section->iov_len == sec_len) *crc = crc32c(*crc, section->iov_base, section->iov_len); return 1; } static inline int read_partial_message_section(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { *crc = 0; return read_partial_message_chunk(con, section, sec_len, crc); } static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); if (do_bounce && unlikely(!con->bounce_page)) { con->bounce_page = alloc_page(GFP_NOIO); if (!con->bounce_page) { pr_err("failed to allocate bounce page\n"); return -ENOMEM; } } while (cursor->sr_resid > 0) { struct page *page, *rpage; size_t off, len; int ret; page = ceph_msg_data_next(cursor, &off, &len); rpage = do_bounce ? con->bounce_page : page; /* clamp to what remains in extent */ len = min_t(int, len, cursor->sr_resid); ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); if (ret <= 0) return ret; *crc = ceph_crc32c_page(*crc, rpage, off, ret); ceph_msg_data_advance(cursor, (size_t)ret); cursor->sr_resid -= ret; if (do_bounce) memcpy_page(page, off, rpage, off, ret); } return 1; } static int read_partial_sparse_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc = 0; int ret = 1; if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { if (con->v1.in_sr_kvec.iov_base) ret = read_partial_message_chunk(con, &con->v1.in_sr_kvec, con->v1.in_sr_len, &crc); else if (cursor->sr_resid > 0) ret = read_partial_sparse_msg_extent(con, &crc); if (ret <= 0) break; memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); ret = con->ops->sparse_read(con, cursor, (char **)&con->v1.in_sr_kvec.iov_base); if (ret <= 0) { ret = ret ? ret : 1; /* must return > 0 to indicate success */ break; } con->v1.in_sr_len = ret; } if (do_datacrc) con->in_data_crc = crc; return ret; } static int read_partial_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); struct page *page; size_t page_offset; size_t length; u32 crc = 0; int ret; if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { if (do_datacrc) con->in_data_crc = crc; return ret; } if (do_datacrc) crc = ceph_crc32c_page(crc, page, page_offset, ret); ceph_msg_data_advance(cursor, (size_t)ret); } if (do_datacrc) con->in_data_crc = crc; return 1; /* must return > 0 to indicate success */ } static int read_partial_msg_data_bounce(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; struct page *page; size_t off, len; u32 crc; int ret; if (unlikely(!con->bounce_page)) { con->bounce_page = alloc_page(GFP_NOIO); if (!con->bounce_page) { pr_err("failed to allocate bounce page\n"); return -ENOMEM; } } crc = con->in_data_crc; while (cursor->total_resid) { if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &off, &len); ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); if (ret <= 0) { con->in_data_crc = crc; return ret; } crc = crc32c(crc, page_address(con->bounce_page), ret); memcpy_to_page(page, off, page_address(con->bounce_page), ret); ceph_msg_data_advance(cursor, ret); } con->in_data_crc = crc; return 1; /* must return > 0 to indicate success */ } /* * read (part of) a message. */ static int read_partial_message(struct ceph_connection *con) { struct ceph_msg *m = con->in_msg; int size; int end; int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); u64 seq; u32 crc; dout("read_partial_message con %p msg %p\n", con, m); /* header */ size = sizeof(con->v1.in_hdr); end = size; ret = read_partial(con, end, size, &con->v1.in_hdr); if (ret <= 0) return ret; crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc)); if (cpu_to_le32(crc) != con->v1.in_hdr.crc) { pr_err("read_partial_message bad hdr crc %u != expected %u\n", crc, con->v1.in_hdr.crc); return -EBADMSG; } front_len = le32_to_cpu(con->v1.in_hdr.front_len); if (front_len > CEPH_MSG_MAX_FRONT_LEN) return -EIO; middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) return -EIO; data_len = le32_to_cpu(con->v1.in_hdr.data_len); if (data_len > CEPH_MSG_MAX_DATA_LEN) return -EIO; /* verify seq# */ seq = le64_to_cpu(con->v1.in_hdr.seq); if ((s64)seq - (s64)con->in_seq < 1) { pr_info("skipping %s%lld %s seq %lld expected %lld\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), seq, con->in_seq + 1); con->v1.in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); con->v1.in_tag = CEPH_MSGR_TAG_READY; return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); con->error_msg = "bad message sequence # for incoming message"; return -EBADE; } /* allocate message? */ if (!con->in_msg) { int skip = 0; dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type, front_len, data_len); ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip); if (ret < 0) return ret; BUG_ON((!con->in_msg) ^ skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); con->v1.in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); con->v1.in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; return 1; } BUG_ON(!con->in_msg); BUG_ON(con->in_msg->con != con); m = con->in_msg; m->front.iov_len = 0; /* haven't read it yet */ if (m->middle) m->middle->vec.iov_len = 0; /* prepare for data payload, if any */ if (data_len) prepare_message_data(con->in_msg, data_len); } /* front */ ret = read_partial_message_section(con, &m->front, front_len, &con->in_front_crc); if (ret <= 0) return ret; /* middle */ if (m->middle) { ret = read_partial_message_section(con, &m->middle->vec, middle_len, &con->in_middle_crc); if (ret <= 0) return ret; } /* (page) data */ if (data_len) { if (!m->num_data_items) return -EIO; if (m->sparse_read_total) ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else ret = read_partial_msg_data(con); if (ret <= 0) return ret; } /* footer */ size = sizeof_footer(con); end += size; ret = read_partial(con, end, size, &m->footer); if (ret <= 0) return ret; if (!need_sign) { m->footer.flags = m->old_footer.flags; m->footer.sig = 0; } dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", m, front_len, m->footer.front_crc, middle_len, m->footer.middle_crc, data_len, m->footer.data_crc); /* crc ok? */ if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { pr_err("read_partial_message %p front crc %u != exp. %u\n", m, con->in_front_crc, m->footer.front_crc); return -EBADMSG; } if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { pr_err("read_partial_message %p middle crc %u != exp %u\n", m, con->in_middle_crc, m->footer.middle_crc); return -EBADMSG; } if (do_datacrc && (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { pr_err("read_partial_message %p data crc %u != exp. %u\n", m, con->in_data_crc, le32_to_cpu(m->footer.data_crc)); return -EBADMSG; } if (need_sign && con->ops->check_message_signature && con->ops->check_message_signature(m)) { pr_err("read_partial_message %p signature check failed\n", m); return -EBADMSG; } return 1; /* done! */ } static int read_keepalive_ack(struct ceph_connection *con) { struct ceph_timespec ceph_ts; size_t size = sizeof(ceph_ts); int ret = read_partial(con, size, size, &ceph_ts); if (ret <= 0) return ret; ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); prepare_read_tag(con); return 1; } /* * Read what we can from the socket. */ int ceph_con_v1_try_read(struct ceph_connection *con) { int ret = -1; more: dout("try_read start %p state %d\n", con, con->state); if (con->state != CEPH_CON_S_V1_BANNER && con->state != CEPH_CON_S_V1_CONNECT_MSG && con->state != CEPH_CON_S_OPEN) return 0; BUG_ON(!con->sock); dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag, con->v1.in_base_pos); if (con->state == CEPH_CON_S_V1_BANNER) { ret = read_partial_banner(con); if (ret <= 0) goto out; ret = process_banner(con); if (ret < 0) goto out; con->state = CEPH_CON_S_V1_CONNECT_MSG; /* * Received banner is good, exchange connection info. * Do not reset out_kvec, as sending our banner raced * with receiving peer banner after connect completed. */ ret = prepare_write_connect(con); if (ret < 0) goto out; prepare_read_connect(con); /* Send connection info before awaiting response */ goto out; } if (con->state == CEPH_CON_S_V1_CONNECT_MSG) { ret = read_partial_connect(con); if (ret <= 0) goto out; ret = process_connect(con); if (ret < 0) goto out; goto more; } WARN_ON(con->state != CEPH_CON_S_OPEN); if (con->v1.in_base_pos < 0) { /* * skipping + discarding content. */ ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos); if (ret <= 0) goto out; dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos); con->v1.in_base_pos += ret; if (con->v1.in_base_pos) goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_READY) { /* * what's next? */ ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1); if (ret <= 0) goto out; dout("try_read got tag %d\n", con->v1.in_tag); switch (con->v1.in_tag) { case CEPH_MSGR_TAG_MSG: prepare_read_message(con); break; case CEPH_MSGR_TAG_ACK: prepare_read_ack(con); break; case CEPH_MSGR_TAG_KEEPALIVE2_ACK: prepare_read_keepalive_ack(con); break; case CEPH_MSGR_TAG_CLOSE: ceph_con_close_socket(con); con->state = CEPH_CON_S_CLOSED; goto out; default: goto bad_tag; } } if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) { ret = read_partial_message(con); if (ret <= 0) { switch (ret) { case -EBADMSG: con->error_msg = "bad crc/signature"; fallthrough; case -EBADE: ret = -EIO; break; case -EIO: con->error_msg = "io error"; break; } goto out; } if (con->v1.in_tag == CEPH_MSGR_TAG_READY) goto more; ceph_con_process_message(con); if (con->state == CEPH_CON_S_OPEN) prepare_read_tag(con); goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_ACK || con->v1.in_tag == CEPH_MSGR_TAG_SEQ) { /* * the final handshake seq exchange is semantically * equivalent to an ACK */ ret = read_partial_ack(con); if (ret <= 0) goto out; process_ack(con); goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { ret = read_keepalive_ack(con); if (ret <= 0) goto out; goto more; } out: dout("try_read done on %p ret %d\n", con, ret); return ret; bad_tag: pr_err("try_read bad tag %d\n", con->v1.in_tag); con->error_msg = "protocol error, garbage tag"; ret = -1; goto out; } /* * Write something to the socket. Called in a worker thread when the * socket appears to be writeable and we have something ready to send. */ int ceph_con_v1_try_write(struct ceph_connection *con) { struct ceph_msg *msg; int ret = 1; dout("try_write start %p state %d\n", con, con->state); if (con->state != CEPH_CON_S_PREOPEN && con->state != CEPH_CON_S_V1_BANNER && con->state != CEPH_CON_S_V1_CONNECT_MSG && con->state != CEPH_CON_S_OPEN) return 0; /* open the socket first? */ if (con->state == CEPH_CON_S_PREOPEN) { BUG_ON(con->sock); con->state = CEPH_CON_S_V1_BANNER; con_out_kvec_reset(con); prepare_write_banner(con); prepare_read_banner(con); BUG_ON(con->in_msg); con->v1.in_tag = CEPH_MSGR_TAG_READY; dout("try_write initiating connect on %p new state %d\n", con, con->state); ret = ceph_tcp_connect(con); if (ret < 0) { con->error_msg = "connect error"; goto out; } } more: dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes); BUG_ON(!con->sock); /* kvec data queued? */ if (con->v1.out_kvec_left) { ret = write_partial_kvec(con); if (ret <= 0) goto out; } if (con->v1.out_skip) { ret = write_partial_skip(con); if (ret <= 0) goto out; } /* msg pages? */ msg = con->out_msg; if (msg) { if (con->v1.out_msg_done) { ceph_msg_put(msg); con->out_msg = NULL; /* we're done with this one */ goto do_next; } ret = write_partial_message_data(con, msg); if (ret == 1) goto more; /* we need to send the footer, too! */ if (ret == 0) goto out; if (ret < 0) { dout("try_write write_partial_message_data err %d\n", ret); goto out; } } do_next: if (con->state == CEPH_CON_S_OPEN) { if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) { prepare_write_keepalive(con); goto more; } /* is anything else pending? */ if ((msg = ceph_con_get_out_msg(con)) != NULL) { prepare_write_message(con, msg); goto more; } if (con->in_seq > con->in_seq_acked) { prepare_write_ack(con); goto more; } } /* Nothing to do! */ ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); dout("try_write nothing else to write.\n"); ret = 0; out: dout("try_write done on %p ret %d\n", con, ret); return ret; } void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg) { WARN_ON(con->v1.out_skip); /* footer */ if (con->v1.out_msg_done) { con->v1.out_skip += con_out_kvec_skip(con); } else { WARN_ON(!msg->data_length); con->v1.out_skip += sizeof_footer(con); } /* data, middle, front */ if (msg->data_length) con->v1.out_skip += msg->cursor.total_resid; if (msg->middle) con->v1.out_skip += con_out_kvec_skip(con); con->v1.out_skip += con_out_kvec_skip(con); dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con, con->v1.out_kvec_bytes, con->v1.out_skip); } void ceph_con_v1_revoke_incoming(struct ceph_connection *con) { unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len); unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len); /* skip rest of message */ con->v1.in_base_pos = con->v1.in_base_pos - sizeof(struct ceph_msg_header) - front_len - middle_len - data_len - sizeof(struct ceph_msg_footer); con->v1.in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos); } bool ceph_con_v1_opened(struct ceph_connection *con) { return con->v1.connect_seq; } void ceph_con_v1_reset_session(struct ceph_connection *con) { con->v1.connect_seq = 0; con->v1.peer_global_seq = 0; } void ceph_con_v1_reset_protocol(struct ceph_connection *con) { con->v1.out_skip = 0; } |
| 4299 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Global definitions for the Ethernet IEEE 802.3 interface. * * Version: @(#)if_ether.h 1.0.1a 02/08/94 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk> */ #ifndef _LINUX_IF_ETHER_H #define _LINUX_IF_ETHER_H #include <linux/skbuff.h> #include <uapi/linux/if_ether.h> /* XX:XX:XX:XX:XX:XX */ #define MAC_ADDR_STR_LEN (3 * ETH_ALEN - 1) static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_mac_header(skb); } /* Prefer this version in TX path, instead of * skb_reset_mac_header() + eth_hdr() */ static inline struct ethhdr *skb_eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb->data; } static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_inner_mac_header(skb); } int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); #endif /* _LINUX_IF_ETHER_H */ |
| 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for Holtek On Line Grip based gamepads * * These include at least a Brazilian "Clone Joypad Super Power Fire" * which uses vendor ID 0x1241 and identifies as "HOLTEK On Line Grip". * * Copyright (c) 2011 Anssi Hannula <anssi.hannula@iki.fi> */ /* */ #include <linux/hid.h> #include <linux/input.h> #include <linux/module.h> #include <linux/slab.h> #include "hid-ids.h" #ifdef CONFIG_HOLTEK_FF /* * These commands and parameters are currently known: * * byte 0: command id: * 01 set effect parameters * 02 play specified effect * 03 stop specified effect * 04 stop all effects * 06 stop all effects * (the difference between 04 and 06 isn't known; win driver * sends 06,04 on application init, and 06 otherwise) * * Commands 01 and 02 need to be sent as pairs, i.e. you need to send 01 * before each 02. * * The rest of the bytes are parameters. Command 01 takes all of them, and * commands 02,03 take only the effect id. * * byte 1: * bits 0-3: effect id: * 1: very strong rumble * 2: periodic rumble, short intervals * 3: very strong rumble * 4: periodic rumble, long intervals * 5: weak periodic rumble, long intervals * 6: weak periodic rumble, short intervals * 7: periodic rumble, short intervals * 8: strong periodic rumble, short intervals * 9: very strong rumble * a: causes an error * b: very strong periodic rumble, very short intervals * c-f: nothing * bit 6: right (weak) motor enabled * bit 7: left (strong) motor enabled * * bytes 2-3: time in milliseconds, big-endian * bytes 5-6: unknown (win driver seems to use at least 10e0 with effect 1 * and 0014 with effect 6) * byte 7: * bits 0-3: effect magnitude */ #define HOLTEKFF_MSG_LENGTH 7 static const u8 start_effect_1[] = { 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const u8 stop_all4[] = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const u8 stop_all6[] = { 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; struct holtekff_device { struct hid_field *field; }; static void holtekff_send(struct holtekff_device *holtekff, struct hid_device *hid, const u8 data[HOLTEKFF_MSG_LENGTH]) { int i; for (i = 0; i < HOLTEKFF_MSG_LENGTH; i++) { holtekff->field->value[i] = data[i]; } dbg_hid("sending %7ph\n", data); hid_hw_request(hid, holtekff->field->report, HID_REQ_SET_REPORT); } static int holtekff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct holtekff_device *holtekff = data; int left, right; /* effect type 1, length 65535 msec */ u8 buf[HOLTEKFF_MSG_LENGTH] = { 0x01, 0x01, 0xff, 0xff, 0x10, 0xe0, 0x00 }; left = effect->u.rumble.strong_magnitude; right = effect->u.rumble.weak_magnitude; dbg_hid("called with 0x%04x 0x%04x\n", left, right); if (!left && !right) { holtekff_send(holtekff, hid, stop_all6); return 0; } if (left) buf[1] |= 0x80; if (right) buf[1] |= 0x40; /* The device takes a single magnitude, so we just sum them up. */ buf[6] = min(0xf, (left >> 12) + (right >> 12)); holtekff_send(holtekff, hid, buf); holtekff_send(holtekff, hid, start_effect_1); return 0; } static int holtekff_init(struct hid_device *hid) { struct holtekff_device *holtekff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hid->inputs.next, struct hid_input, list); dev = hidinput->input; if (list_empty(report_list)) { hid_err(hid, "no output report found\n"); return -ENODEV; } report = list_entry(report_list->next, struct hid_report, list); if (report->maxfield < 1 || report->field[0]->report_count != 7) { hid_err(hid, "unexpected output report layout\n"); return -ENODEV; } holtekff = kzalloc(sizeof(*holtekff), GFP_KERNEL); if (!holtekff) return -ENOMEM; set_bit(FF_RUMBLE, dev->ffbit); holtekff->field = report->field[0]; /* initialize the same way as win driver does */ holtekff_send(holtekff, hid, stop_all4); holtekff_send(holtekff, hid, stop_all6); error = input_ff_create_memless(dev, holtekff, holtekff_play); if (error) { kfree(holtekff); return error; } hid_info(hid, "Force feedback for Holtek On Line Grip based devices by Anssi Hannula <anssi.hannula@iki.fi>\n"); return 0; } #else static inline int holtekff_init(struct hid_device *hid) { return 0; } #endif static int holtek_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } holtekff_init(hdev); return 0; err: return ret; } static const struct hid_device_id holtek_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK, USB_DEVICE_ID_HOLTEK_ON_LINE_GRIP) }, { } }; MODULE_DEVICE_TABLE(hid, holtek_devices); static struct hid_driver holtek_driver = { .name = "holtek", .id_table = holtek_devices, .probe = holtek_probe, }; module_hid_driver(holtek_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>"); MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); |
| 1 1 7 1 1 1 1 1 1 1 1 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | // SPDX-License-Identifier: GPL-2.0+ /* * mpc624.c * Hardware driver for a Micro/sys inc. MPC-624 PC/104 board * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2000 David A. Schleef <ds@schleef.org> */ /* * Driver: mpc624 * Description: Micro/sys MPC-624 PC/104 board * Devices: [Micro/sys] MPC-624 (mpc624) * Author: Stanislaw Raczynski <sraczynski@op.pl> * Updated: Thu, 15 Sep 2005 12:01:18 +0200 * Status: working * * The Micro/sys MPC-624 board is based on the LTC2440 24-bit sigma-delta * ADC chip. * * Subdevices supported by the driver: * - Analog In: supported * - Digital I/O: not supported * - LEDs: not supported * - EEPROM: not supported * * Configuration Options: * [0] - I/O base address * [1] - conversion rate * Conversion rate RMS noise Effective Number Of Bits * 0 3.52kHz 23uV 17 * 1 1.76kHz 3.5uV 20 * 2 880Hz 2uV 21.3 * 3 440Hz 1.4uV 21.8 * 4 220Hz 1uV 22.4 * 5 110Hz 750uV 22.9 * 6 55Hz 510nV 23.4 * 7 27.5Hz 375nV 24 * 8 13.75Hz 250nV 24.4 * 9 6.875Hz 200nV 24.6 * [2] - voltage range * 0 -1.01V .. +1.01V * 1 -10.1V .. +10.1V */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #include <linux/delay.h> /* Offsets of different ports */ #define MPC624_MASTER_CONTROL 0 /* not used */ #define MPC624_GNMUXCH 1 /* Gain, Mux, Channel of ADC */ #define MPC624_ADC 2 /* read/write to/from ADC */ #define MPC624_EE 3 /* read/write to/from serial EEPROM via I2C */ #define MPC624_LEDS 4 /* write to LEDs */ #define MPC624_DIO 5 /* read/write to/from digital I/O ports */ #define MPC624_IRQ_MASK 6 /* IRQ masking enable/disable */ /* Register bits' names */ #define MPC624_ADBUSY BIT(5) #define MPC624_ADSDO BIT(4) #define MPC624_ADFO BIT(3) #define MPC624_ADCS BIT(2) #define MPC624_ADSCK BIT(1) #define MPC624_ADSDI BIT(0) /* 32-bit output value bits' names */ #define MPC624_EOC_BIT BIT(31) #define MPC624_DMY_BIT BIT(30) #define MPC624_SGN_BIT BIT(29) /* SDI Speed/Resolution Programming bits */ #define MPC624_OSR(x) (((x) & 0x1f) << 27) #define MPC624_SPEED_3_52_KHZ MPC624_OSR(0x11) #define MPC624_SPEED_1_76_KHZ MPC624_OSR(0x12) #define MPC624_SPEED_880_HZ MPC624_OSR(0x13) #define MPC624_SPEED_440_HZ MPC624_OSR(0x14) #define MPC624_SPEED_220_HZ MPC624_OSR(0x15) #define MPC624_SPEED_110_HZ MPC624_OSR(0x16) #define MPC624_SPEED_55_HZ MPC624_OSR(0x17) #define MPC624_SPEED_27_5_HZ MPC624_OSR(0x18) #define MPC624_SPEED_13_75_HZ MPC624_OSR(0x19) #define MPC624_SPEED_6_875_HZ MPC624_OSR(0x1f) struct mpc624_private { unsigned int ai_speed; }; /* -------------------------------------------------------------------------- */ static const struct comedi_lrange range_mpc624_bipolar1 = { 1, { /* BIP_RANGE(1.01) this is correct, */ /* but my MPC-624 actually seems to have a range of 2.02 */ BIP_RANGE(2.02) } }; static const struct comedi_lrange range_mpc624_bipolar10 = { 1, { /* BIP_RANGE(10.1) this is correct, */ /* but my MPC-624 actually seems to have a range of 20.2 */ BIP_RANGE(20.2) } }; static unsigned int mpc624_ai_get_sample(struct comedi_device *dev, struct comedi_subdevice *s) { struct mpc624_private *devpriv = dev->private; unsigned int data_out = devpriv->ai_speed; unsigned int data_in = 0; unsigned int bit; int i; /* Start reading data */ udelay(1); for (i = 0; i < 32; i++) { /* Set the clock low */ outb(0, dev->iobase + MPC624_ADC); udelay(1); /* Set the ADSDI line for the next bit (send to MPC624) */ bit = (data_out & BIT(31)) ? MPC624_ADSDI : 0; outb(bit, dev->iobase + MPC624_ADC); udelay(1); /* Set the clock high */ outb(MPC624_ADSCK | bit, dev->iobase + MPC624_ADC); udelay(1); /* Read ADSDO on high clock (receive from MPC624) */ data_in <<= 1; data_in |= (inb(dev->iobase + MPC624_ADC) & MPC624_ADSDO) >> 4; udelay(1); data_out <<= 1; } /* * Received 32-bit long value consist of: * 31: EOC - (End Of Transmission) bit - should be 0 * 30: DMY - (Dummy) bit - should be 0 * 29: SIG - (Sign) bit - 1 if positive, 0 if negative * 28: MSB - (Most Significant Bit) - the first bit of the * conversion result * .... * 05: LSB - (Least Significant Bit)- the last bit of the * conversion result * 04-00: sub-LSB - sub-LSBs are basically noise, but when * averaged properly, they can increase * conversion precision up to 29 bits; * they can be discarded without loss of * resolution. */ if (data_in & MPC624_EOC_BIT) dev_dbg(dev->class_dev, "EOC bit is set!"); if (data_in & MPC624_DMY_BIT) dev_dbg(dev->class_dev, "DMY bit is set!"); if (data_in & MPC624_SGN_BIT) { /* * Voltage is positive * * comedi operates on unsigned numbers, so mask off EOC * and DMY and don't clear the SGN bit */ data_in &= 0x3fffffff; } else { /* * The voltage is negative * * data_in contains a number in 30-bit two's complement * code and we must deal with it */ data_in |= MPC624_SGN_BIT; data_in = ~data_in; data_in += 1; /* clear EOC and DMY bits */ data_in &= ~(MPC624_EOC_BIT | MPC624_DMY_BIT); data_in = 0x20000000 - data_in; } return data_in; } static int mpc624_ai_eoc(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned long context) { unsigned char status; status = inb(dev->iobase + MPC624_ADC); if ((status & MPC624_ADBUSY) == 0) return 0; return -EBUSY; } static int mpc624_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; int i; /* * WARNING: * We always write 0 to GNSWA bit, so the channel range is +-/10.1Vdc */ outb(insn->chanspec, dev->iobase + MPC624_GNMUXCH); for (i = 0; i < insn->n; i++) { /* Trigger the conversion */ outb(MPC624_ADSCK, dev->iobase + MPC624_ADC); udelay(1); outb(MPC624_ADCS | MPC624_ADSCK, dev->iobase + MPC624_ADC); udelay(1); outb(0, dev->iobase + MPC624_ADC); udelay(1); /* Wait for the conversion to end */ ret = comedi_timeout(dev, s, insn, mpc624_ai_eoc, 0); if (ret) return ret; data[i] = mpc624_ai_get_sample(dev, s); } return insn->n; } static int mpc624_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct mpc624_private *devpriv; struct comedi_subdevice *s; int ret; ret = comedi_request_region(dev, it->options[0], 0x10); if (ret) return ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; switch (it->options[1]) { case 0: devpriv->ai_speed = MPC624_SPEED_3_52_KHZ; break; case 1: devpriv->ai_speed = MPC624_SPEED_1_76_KHZ; break; case 2: devpriv->ai_speed = MPC624_SPEED_880_HZ; break; case 3: devpriv->ai_speed = MPC624_SPEED_440_HZ; break; case 4: devpriv->ai_speed = MPC624_SPEED_220_HZ; break; case 5: devpriv->ai_speed = MPC624_SPEED_110_HZ; break; case 6: devpriv->ai_speed = MPC624_SPEED_55_HZ; break; case 7: devpriv->ai_speed = MPC624_SPEED_27_5_HZ; break; case 8: devpriv->ai_speed = MPC624_SPEED_13_75_HZ; break; case 9: devpriv->ai_speed = MPC624_SPEED_6_875_HZ; break; default: devpriv->ai_speed = MPC624_SPEED_3_52_KHZ; } ret = comedi_alloc_subdevices(dev, 1); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_DIFF; s->n_chan = 4; s->maxdata = 0x3fffffff; s->range_table = (it->options[1] == 0) ? &range_mpc624_bipolar1 : &range_mpc624_bipolar10; s->insn_read = mpc624_ai_insn_read; return 0; } static struct comedi_driver mpc624_driver = { .driver_name = "mpc624", .module = THIS_MODULE, .attach = mpc624_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(mpc624_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for Micro/sys MPC-624 PC/104 board"); MODULE_LICENSE("GPL"); |
| 34 34 17 17 10 9 10 9 1 5 27 27 2 2 2 3 2 16 27 4 4 4 4 4 3 3 3 3 11 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 3 4 1 3 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 | // SPDX-License-Identifier: GPL-2.0-or-later /* * PPP synchronous tty channel driver for Linux. * * This is a ppp channel driver that can be used with tty device drivers * that are frame oriented, such as synchronous HDLC devices. * * Complete PPP frames without encoding/decoding are exchanged between * the channel driver and the device driver. * * The async map IOCTL codes are implemented to keep the user mode * applications happy if they call them. Synchronous PPP does not use * the async maps. * * Copyright 1999 Paul Mackerras. * * Also touched by the grubby hands of Paul Fulghum paulkf@microgate.com * * This driver provides the encapsulation and framing for sending * and receiving PPP frames over sync serial lines. It relies on * the generic PPP layer to give it frames to send and to process * received frames. It implements the PPP line discipline. * * Part of the code in this driver was inspired by the old async-only * PPP driver, written by Michael Callahan and Al Longyear, and * subsequently hacked by Paul Mackerras. * * ==FILEVERSION 20040616== */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/netdevice.h> #include <linux/poll.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/ppp_channel.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/refcount.h> #include <linux/unaligned.h> #include <linux/uaccess.h> #define PPP_VERSION "2.4.2" /* Structure for storing local state. */ struct syncppp { struct tty_struct *tty; unsigned int flags; unsigned int rbits; int mru; spinlock_t xmit_lock; spinlock_t recv_lock; unsigned long xmit_flags; u32 xaccm[8]; u32 raccm; unsigned int bytes_sent; unsigned int bytes_rcvd; struct sk_buff *tpkt; unsigned long last_xmit; struct sk_buff_head rqueue; struct tasklet_struct tsk; refcount_t refcnt; struct completion dead_cmp; struct ppp_channel chan; /* interface to generic ppp layer */ }; /* Bit numbers in xmit_flags */ #define XMIT_WAKEUP 0 #define XMIT_FULL 1 /* Bits in rbits */ #define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) #define PPPSYNC_MAX_RQLEN 32 /* arbitrary */ /* * Prototypes. */ static struct sk_buff* ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *); static int ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb); static int ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg); static void ppp_sync_process(struct tasklet_struct *t); static int ppp_sync_push(struct syncppp *ap); static void ppp_sync_flush_output(struct syncppp *ap); static void ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count); static const struct ppp_channel_ops sync_ops = { .start_xmit = ppp_sync_send, .ioctl = ppp_sync_ioctl, }; /* * Utility procedure to print a buffer in hex/ascii */ static void ppp_print_buffer (const char *name, const __u8 *buf, int count) { if (name != NULL) printk(KERN_DEBUG "ppp_synctty: %s, count = %d\n", name, count); print_hex_dump_bytes("", DUMP_PREFIX_NONE, buf, count); } /* * Routines implementing the synchronous PPP line discipline. */ /* * We have a potential race on dereferencing tty->disc_data, * because the tty layer provides no locking at all - thus one * cpu could be running ppp_synctty_receive while another * calls ppp_synctty_close, which zeroes tty->disc_data and * frees the memory that ppp_synctty_receive is using. The best * way to fix this is to use a rwlock in the tty struct, but for now * we use a single global rwlock for all ttys in ppp line discipline. * * FIXME: Fixed in tty_io nowadays. */ static DEFINE_RWLOCK(disc_data_lock); static struct syncppp *sp_get(struct tty_struct *tty) { struct syncppp *ap; read_lock(&disc_data_lock); ap = tty->disc_data; if (ap != NULL) refcount_inc(&ap->refcnt); read_unlock(&disc_data_lock); return ap; } static void sp_put(struct syncppp *ap) { if (refcount_dec_and_test(&ap->refcnt)) complete(&ap->dead_cmp); } /* * Called when a tty is put into sync-PPP line discipline. */ static int ppp_sync_open(struct tty_struct *tty) { struct syncppp *ap; int err; int speed; if (tty->ops->write == NULL) return -EOPNOTSUPP; ap = kzalloc(sizeof(*ap), GFP_KERNEL); err = -ENOMEM; if (!ap) goto out; /* initialize the syncppp structure */ ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); spin_lock_init(&ap->recv_lock); ap->xaccm[0] = ~0U; ap->xaccm[3] = 0x60000000U; ap->raccm = ~0U; skb_queue_head_init(&ap->rqueue); tasklet_setup(&ap->tsk, ppp_sync_process); refcount_set(&ap->refcnt, 1); init_completion(&ap->dead_cmp); ap->chan.private = ap; ap->chan.ops = &sync_ops; ap->chan.mtu = PPP_MRU; ap->chan.hdrlen = 2; /* for A/C bytes */ speed = tty_get_baud_rate(tty); ap->chan.speed = speed; err = ppp_register_channel(&ap->chan); if (err) goto out_free; tty->disc_data = ap; tty->receive_room = 65536; return 0; out_free: kfree(ap); out: return err; } /* * Called when the tty is put into another line discipline * or it hangs up. We have to wait for any cpu currently * executing in any of the other ppp_synctty_* routines to * finish before we can call ppp_unregister_channel and free * the syncppp struct. This routine must be called from * process context, not interrupt or softirq context. */ static void ppp_sync_close(struct tty_struct *tty) { struct syncppp *ap; write_lock_irq(&disc_data_lock); ap = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!ap) return; /* * We have now ensured that nobody can start using ap from now * on, but we have to wait for all existing users to finish. * Note that ppp_unregister_channel ensures that no calls to * our channel ops (i.e. ppp_sync_send/ioctl) are in progress * by the time it returns. */ if (!refcount_dec_and_test(&ap->refcnt)) wait_for_completion(&ap->dead_cmp); tasklet_kill(&ap->tsk); ppp_unregister_channel(&ap->chan); skb_queue_purge(&ap->rqueue); kfree_skb(ap->tpkt); kfree(ap); } /* * Called on tty hangup in process context. * * Wait for I/O to driver to complete and unregister PPP channel. * This is already done by the close routine, so just call that. */ static void ppp_sync_hangup(struct tty_struct *tty) { ppp_sync_close(tty); } /* * Read does nothing - no data is ever available this way. * Pppd reads and writes packets via /dev/ppp instead. */ static ssize_t ppp_sync_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t count, void **cookie, unsigned long offset) { return -EAGAIN; } /* * Write on the tty does nothing, the packets all come in * from the ppp generic stuff. */ static ssize_t ppp_sync_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t count) { return -EAGAIN; } static int ppp_synctty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct syncppp *ap = sp_get(tty); int __user *p = (int __user *)arg; int err, val; if (!ap) return -ENXIO; err = -EFAULT; switch (cmd) { case PPPIOCGCHAN: err = -EFAULT; if (put_user(ppp_channel_index(&ap->chan), p)) break; err = 0; break; case PPPIOCGUNIT: err = -EFAULT; if (put_user(ppp_unit_number(&ap->chan), p)) break; err = 0; break; case TCFLSH: /* flush our buffers and the serial port's buffer */ if (arg == TCIOFLUSH || arg == TCOFLUSH) ppp_sync_flush_output(ap); err = n_tty_ioctl_helper(tty, cmd, arg); break; case FIONREAD: val = 0; if (put_user(val, p)) break; err = 0; break; default: err = tty_mode_ioctl(tty, cmd, arg); break; } sp_put(ap); return err; } /* May sleep, don't call from interrupt level or with interrupts disabled */ static void ppp_sync_receive(struct tty_struct *tty, const u8 *buf, const u8 *cflags, size_t count) { struct syncppp *ap = sp_get(tty); unsigned long flags; if (!ap) return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_sync_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); if (!skb_queue_empty(&ap->rqueue)) tasklet_schedule(&ap->tsk); sp_put(ap); tty_unthrottle(tty); } static void ppp_sync_wakeup(struct tty_struct *tty) { struct syncppp *ap = sp_get(tty); clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); if (!ap) return; set_bit(XMIT_WAKEUP, &ap->xmit_flags); tasklet_schedule(&ap->tsk); sp_put(ap); } static struct tty_ldisc_ops ppp_sync_ldisc = { .owner = THIS_MODULE, .num = N_SYNC_PPP, .name = "pppsync", .open = ppp_sync_open, .close = ppp_sync_close, .hangup = ppp_sync_hangup, .read = ppp_sync_read, .write = ppp_sync_write, .ioctl = ppp_synctty_ioctl, .receive_buf = ppp_sync_receive, .write_wakeup = ppp_sync_wakeup, }; static int __init ppp_sync_init(void) { int err; err = tty_register_ldisc(&ppp_sync_ldisc); if (err != 0) printk(KERN_ERR "PPP_sync: error %d registering line disc.\n", err); return err; } /* * The following routines provide the PPP channel interface. */ static int ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) { struct syncppp *ap = chan->private; int err, val; u32 accm[8]; void __user *argp = (void __user *)arg; u32 __user *p = argp; err = -EFAULT; switch (cmd) { case PPPIOCGFLAGS: val = ap->flags | ap->rbits; if (put_user(val, (int __user *) argp)) break; err = 0; break; case PPPIOCSFLAGS: if (get_user(val, (int __user *) argp)) break; ap->flags = val & ~SC_RCV_BITS; spin_lock_irq(&ap->recv_lock); ap->rbits = val & SC_RCV_BITS; spin_unlock_irq(&ap->recv_lock); err = 0; break; case PPPIOCGASYNCMAP: if (put_user(ap->xaccm[0], p)) break; err = 0; break; case PPPIOCSASYNCMAP: if (get_user(ap->xaccm[0], p)) break; err = 0; break; case PPPIOCGRASYNCMAP: if (put_user(ap->raccm, p)) break; err = 0; break; case PPPIOCSRASYNCMAP: if (get_user(ap->raccm, p)) break; err = 0; break; case PPPIOCGXASYNCMAP: if (copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm))) break; err = 0; break; case PPPIOCSXASYNCMAP: if (copy_from_user(accm, argp, sizeof(accm))) break; accm[2] &= ~0x40000000U; /* can't escape 0x5e */ accm[3] |= 0x60000000U; /* must escape 0x7d, 0x7e */ memcpy(ap->xaccm, accm, sizeof(ap->xaccm)); err = 0; break; case PPPIOCGMRU: if (put_user(ap->mru, (int __user *) argp)) break; err = 0; break; case PPPIOCSMRU: if (get_user(val, (int __user *) argp)) break; if (val > U16_MAX) { err = -EINVAL; break; } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; err = 0; break; default: err = -ENOTTY; } return err; } /* * This is called at softirq level to deliver received packets * to the ppp_generic code, and to tell the ppp_generic code * if we can accept more output now. */ static void ppp_sync_process(struct tasklet_struct *t) { struct syncppp *ap = from_tasklet(ap, t, tsk); struct sk_buff *skb; /* process received packets */ while ((skb = skb_dequeue(&ap->rqueue)) != NULL) { if (skb->len == 0) { /* zero length buffers indicate error */ ppp_input_error(&ap->chan, 0); kfree_skb(skb); } else ppp_input(&ap->chan, skb); } /* try to push more stuff out */ if (test_bit(XMIT_WAKEUP, &ap->xmit_flags) && ppp_sync_push(ap)) ppp_output_wakeup(&ap->chan); } /* * Procedures for encapsulation and framing. */ static struct sk_buff* ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb) { int proto; unsigned char *data; int islcp; /* Ensure we can safely access protocol field and LCP code */ if (!pskb_may_pull(skb, 3)) { kfree_skb(skb); return NULL; } data = skb->data; proto = get_unaligned_be16(data); /* LCP packets with codes between 1 (configure-request) * and 7 (code-reject) must be sent as though no options * have been negotiated. */ islcp = proto == PPP_LCP && 1 <= data[2] && data[2] <= 7; /* compress protocol field if option enabled */ if (data[0] == 0 && (ap->flags & SC_COMP_PROT) && !islcp) skb_pull(skb,1); /* prepend address/control fields if necessary */ if ((ap->flags & SC_COMP_AC) == 0 || islcp) { if (skb_headroom(skb) < 2) { struct sk_buff *npkt = dev_alloc_skb(skb->len + 2); if (npkt == NULL) { kfree_skb(skb); return NULL; } skb_reserve(npkt,2); skb_copy_from_linear_data(skb, skb_put(npkt, skb->len), skb->len); consume_skb(skb); skb = npkt; } skb_push(skb,2); skb->data[0] = PPP_ALLSTATIONS; skb->data[1] = PPP_UI; } ap->last_xmit = jiffies; if (skb && ap->flags & SC_LOG_OUTPKT) ppp_print_buffer ("send buffer", skb->data, skb->len); return skb; } /* * Transmit-side routines. */ /* * Send a packet to the peer over an sync tty line. * Returns 1 iff the packet was accepted. * If the packet was not accepted, we will call ppp_output_wakeup * at some later time. */ static int ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb) { struct syncppp *ap = chan->private; ppp_sync_push(ap); if (test_and_set_bit(XMIT_FULL, &ap->xmit_flags)) return 0; /* already full */ skb = ppp_sync_txmunge(ap, skb); if (skb != NULL) ap->tpkt = skb; else clear_bit(XMIT_FULL, &ap->xmit_flags); ppp_sync_push(ap); return 1; } /* * Push as much data as possible out to the tty. */ static int ppp_sync_push(struct syncppp *ap) { int sent, done = 0; struct tty_struct *tty = ap->tty; int tty_stuffed = 0; if (!spin_trylock_bh(&ap->xmit_lock)) return 0; for (;;) { if (test_and_clear_bit(XMIT_WAKEUP, &ap->xmit_flags)) tty_stuffed = 0; if (!tty_stuffed && ap->tpkt) { set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); sent = tty->ops->write(tty, ap->tpkt->data, ap->tpkt->len); if (sent < 0) goto flush; /* error, e.g. loss of CD */ if (sent < ap->tpkt->len) { tty_stuffed = 1; } else { consume_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } continue; } /* haven't made any progress */ spin_unlock_bh(&ap->xmit_lock); if (!(test_bit(XMIT_WAKEUP, &ap->xmit_flags) || (!tty_stuffed && ap->tpkt))) break; if (!spin_trylock_bh(&ap->xmit_lock)) break; } return done; flush: if (ap->tpkt) { kfree_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } spin_unlock_bh(&ap->xmit_lock); return done; } /* * Flush output from our internal buffers. * Called for the TCFLSH ioctl. */ static void ppp_sync_flush_output(struct syncppp *ap) { int done = 0; spin_lock_bh(&ap->xmit_lock); if (ap->tpkt != NULL) { kfree_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } spin_unlock_bh(&ap->xmit_lock); if (done) ppp_output_wakeup(&ap->chan); } /* * Receive-side routines. */ /* called when the tty driver has data for us. * * Data is frame oriented: each call to ppp_sync_input is considered * a whole frame. If the 1st flag byte is non-zero then the whole * frame is considered to be in error and is tossed. */ static void ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count) { struct sk_buff *skb; unsigned char *p; if (count == 0) return; if (ap->flags & SC_LOG_INPKT) ppp_print_buffer ("receive buffer", buf, count); /* stuff the chars in the skb */ skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) { printk(KERN_ERR "PPPsync: no memory (input pkt)\n"); goto err; } /* Try to get the payload 4-byte aligned */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); if (flags && *flags) { /* error flag set, ignore frame */ goto err; } else if (count > skb_tailroom(skb)) { /* packet overflowed MRU */ goto err; } skb_put_data(skb, buf, count); /* strip address/control field if present */ p = skb->data; if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { /* chop off address/control */ if (skb->len < 3) goto err; p = skb_pull(skb, 2); } /* PPP packet length should be >= 2 bytes when protocol field is not * compressed. */ if (!(p[0] & 0x01) && skb->len < 2) goto err; /* queue the frame to be processed */ skb_queue_tail(&ap->rqueue, skb); return; err: /* queue zero length packet as error indication */ if (skb || (skb = dev_alloc_skb(0))) { skb_trim(skb, 0); skb_queue_tail(&ap->rqueue, skb); } } static void __exit ppp_sync_cleanup(void) { tty_unregister_ldisc(&ppp_sync_ldisc); } module_init(ppp_sync_init); module_exit(ppp_sync_cleanup); MODULE_DESCRIPTION("PPP synchronous TTY channel module"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_SYNC_PPP); |
| 3241 2651 2866 529 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _NET_RPS_H #define _NET_RPS_H #include <linux/types.h> #include <linux/static_key.h> #include <net/sock.h> #include <net/hotdata.h> #ifdef CONFIG_RPS extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; /* * This structure holds an RPS map which can be of variable length. The * map is an array of CPUs. */ struct rps_map { unsigned int len; struct rcu_head rcu; u16 cpus[]; }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) /* * The rps_dev_flow structure contains the mapping of a flow to a CPU, the * tail pointer for that CPU's input queue at the time of last enqueue, a * hardware filter index, and the hash of the flow if aRFS is enabled. */ struct rps_dev_flow { u16 cpu; u16 filter; unsigned int last_qtail; #ifdef CONFIG_RFS_ACCEL u32 hash; #endif }; #define RPS_NO_FILTER 0xffff /* * The rps_dev_flow_table structure contains a table of flow mappings. */ struct rps_dev_flow_table { u8 log; struct rcu_head rcu; struct rps_dev_flow flows[]; }; #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ ((_num) * sizeof(struct rps_dev_flow))) /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). * Each entry is a 32bit value. Upper part is the high-order bits * of flow hash, lower part is CPU number. * rps_cpu_mask is used to partition the space, depending on number of * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { struct rcu_head rcu; u32 mask; u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { unsigned int index = hash & table->mask; u32 val = hash & ~net_hotdata.rps_cpu_mask; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); /* The following WRITE_ONCE() is paired with the READ_ONCE() * here, and another one in get_rps_cpu(). */ if (READ_ONCE(table->ents[index]) != val) WRITE_ONCE(table->ents[index], val); } static inline void _sock_rps_record_flow_hash(__u32 hash) { struct rps_sock_flow_table *sock_flow_table; if (!hash) return; rcu_read_lock(); sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (sock_flow_table) rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); } static inline void _sock_rps_record_flow(const struct sock *sk) { /* Reading sk->sk_rxhash might incur an expensive cache line * miss. * * TCP_ESTABLISHED does cover almost all states where RFS * might be useful, and is cheaper [1] than testing : * IPv4: inet_sk(sk)->inet_daddr * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) * OR an additional socket flag * [1] : sk_state and sk_prot are in the same cache line. */ if (sk->sk_state == TCP_ESTABLISHED) { /* This READ_ONCE() is paired with the WRITE_ONCE() * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). */ _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); } } static inline void _sock_rps_delete_flow(const struct sock *sk) { struct rps_sock_flow_table *table; u32 hash, index; hash = READ_ONCE(sk->sk_rxhash); if (!hash) return; rcu_read_lock(); table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (table) { index = hash & table->mask; if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) WRITE_ONCE(table->ents[index], RPS_NO_CPU); } rcu_read_unlock(); } #endif /* CONFIG_RPS */ static inline bool rfs_is_needed(void) { #ifdef CONFIG_RPS return static_branch_unlikely(&rfs_needed); #else return false; #endif } static inline void sock_rps_record_flow_hash(__u32 hash) { #ifdef CONFIG_RPS if (!rfs_is_needed()) return; _sock_rps_record_flow_hash(hash); #endif } static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS if (!rfs_is_needed()) return; _sock_rps_record_flow(sk); #endif } static inline void sock_rps_delete_flow(const struct sock *sk) { #ifdef CONFIG_RPS if (!rfs_is_needed()) return; _sock_rps_delete_flow(sk); #endif } static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) { #ifdef CONFIG_RPS return ++sd->input_queue_tail; #else return 0; #endif } static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) { #ifdef CONFIG_RPS WRITE_ONCE(*dest, tail); #endif } static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) { #ifdef CONFIG_RPS WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); #endif } static inline void rps_input_queue_head_incr(struct softnet_data *sd) { rps_input_queue_head_add(sd, 1); } #endif /* _NET_RPS_H */ |
| 32 33 32 89 89 6 251 9 3 231 3 4 3 184 9 52 5 185 5 2 47 51 229 2 2 48 10 11 5 50 45 145 175 44 31 149 159 9 1 21 13 139 3 165 1 1 1 2 2 2 2 100 100 100 100 100 100 100 100 99 100 45 97 131 132 36 1 35 1 7 30 3 3 2 2 2 7 17 12 1 12 12 10 2 12 72 37 105 1 2 3 108 108 108 57 58 58 57 57 50 58 58 56 2 55 3 56 1 58 35 23 40 18 18 18 48 10 57 58 18 50 5 45 14 68 68 3 53 62 5 57 57 12 9 5 1 40 5 3 2 1 1 14 38 12 12 12 40 52 56 6 6 1 1 6 6 120 112 91 27 66 1 64 1 6 243 243 229 15 43 43 43 25 28 28 223 120 8 103 10 216 10 10 1 1 7 14 13 1 12 2 11 3 14 5 9 179 181 206 206 205 12 199 2 195 194 191 175 129 1 1 23 1 2 20 19 19 1 1 125 17 1 4 6 1 121 120 1 1 2 55 93 30 12 19 216 216 19 203 202 4 197 185 24 186 187 116 173 1 5 2 1 9 9 5 9 5 10 9 2 1 10 43 43 43 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 | // SPDX-License-Identifier: GPL-2.0-or-later /* * TCP over IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on: * linux/net/ipv4/tcp.c * linux/net/ipv4/tcp_input.c * linux/net/ipv4/tcp_output.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. */ #include <linux/bottom_half.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/jiffies.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/indirect_call_wrapper.h> #include <net/aligned_data.h> #include <net/tcp.h> #include <net/ndisc.h> #include <net/inet6_hashtables.h> #include <net/inet6_connection_sock.h> #include <net/ipv6.h> #include <net/transp_v6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/ip6_checksum.h> #include <net/inet_ecn.h> #include <net/protocol.h> #include <net/xfrm.h> #include <net/snmp.h> #include <net/dsfield.h> #include <net/timewait_sock.h> #include <net/inet_common.h> #include <net/secure_seq.h> #include <net/hotdata.h> #include <net/busy_poll.h> #include <net/rstreason.h> #include <net/psp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <crypto/hash.h> #include <linux/scatterlist.h> #include <trace/events/tcp.h> static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #endif /* Helper returning the inet6 address from a given tcp socket. * It can be used in TCP stack instead of inet6_sk(sk). * This avoids a dereference and allow compiler optimizations. * It is a specialized version of inet6_sk_generic(). */ #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ struct tcp6_sock, tcp)->inet6) static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } } static u32 tcp_v6_init_seq(const struct sk_buff *skb) { return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, tcp_hdr(skb)->dest, tcp_hdr(skb)->source); } static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { /* This check is replicated from tcp_v6_connect() and intended to * prevent BPF program called below from accessing bytes that are out * of the bound specified by user in addr_len. */ if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; sock_owned_by_me(sk); return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct ipv6_txoptions *opt; struct dst_entry *dst; struct flowi6 fl6; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) { if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), &usin->sin6_addr); else usin->sin6_addr = in6addr_loopback; } addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } if (tp->rx_opt.ts_recent_stamp && !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; WRITE_ONCE(tp->write_seq, 0); } sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* * TCP over IPv4 */ if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; if (ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; } np->saddr = sk->sk_v6_rcv_saddr; return err; } if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; fl6.flowi6_uid = sk_uid(sk); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } tp->tcp_usec_ts = dst_tcp_usec_ts(dst); tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { saddr = &fl6.saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); if (err) goto failure; } /* set the source address */ np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, false, false); icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); if (opt) icsk->icsk_ext_hdr_len += opt->opt_flen + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; sk_set_txhash(sk); if (likely(!tp->repair)) { if (!tp->write_seq) WRITE_ONCE(tp->write_seq, secure_tcpv6_seq(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport)); tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) return err; if (err) goto late_failure; err = tcp_connect(sk); if (err) goto late_failure; return 0; late_failure: tcp_set_state(sk, TCP_CLOSE); inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; u32 mtu; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; mtu = READ_ONCE(tcp_sk(sk)->mtu_info); /* Drop requests trying to increase our current mss. * Check done in __ip6_rt_update_pmtu() is too late. */ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) return; dst = inet6_csk_update_pmtu(sk, mtu); if (!dst) return; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } } static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct net *net = dev_net_rcu(skb->dev); struct request_sock *fastopen; struct ipv6_pinfo *np; struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; bool fatal; int err; sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), skb->dev->ifindex, inet6_sdif(skb)); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { /* To increase the counter of ignored icmps for TCP-AO */ tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } seq = ntohl(th->seq); fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) { tcp_req_err(sk, seq, fatal); return 0; } if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { sock_put(sk); return 0; } bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } } tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = tcp_inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); } goto out; } if (type == ICMPV6_PKT_TOOBIG) { u32 mtu = ntohl(info); /* We are not interested in TCP_LISTEN and open_requests * (SYN-ACKs send out by Linux are always <576bytes so * they should go through unfragmented). */ if (sk->sk_state == TCP_LISTEN) goto out; if (!ip6_sk_accept_pmtu(sk)) goto out; if (mtu < IPV6_MIN_MTU) goto out; WRITE_ONCE(tp->mtu_info, mtu); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); goto out; } /* Might be for an request_sock */ switch (sk->sk_state) { case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * already accepted it is treated as a connected one below. */ if (fastopen && !fastopen->sk) break; ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) tcp_done_with_error(sk, err); else WRITE_ONCE(sk->sk_err_soft, err); goto out; case TCP_LISTEN: break; default: /* check if this ICMP message allows revert of backoff. * (see RFC 6069) */ if (!fastopen && type == ICMPV6_DEST_UNREACH && code == ICMPV6_NOROUTE) tcp_ld_RTO_revert(sk, seq); } if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } out: bh_unlock_sock(sk); sock_put(sk); return 0; } static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; u8 tclass; /* First, grab a route. */ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, IPPROTO_TCP)) == NULL) goto done; skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); if (skb) { tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; if (!INET_ECN_is_capable(tclass) && tcp_bpf_ca_needs_ecn((struct sock *)req)) tclass |= INET_ECN_ECT_0; rcu_read_lock(); opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } done: return err; } static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); consume_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, const struct in6_addr *addr, int l3index) { return tcp_md5_do_lookup(sk, l3index, (union tcp_md5_addr *)addr, AF_INET6); } static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, const struct sock *addr_sk) { int l3index; l3index = l3mdev_master_ifindex_by_index(sock_net(sk), addr_sk->sk_bound_dev_if); return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, l3index); } static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; bool l3flag; u8 flags; if (optlen < sizeof(cmd)) return -EINVAL; if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) return -EFAULT; if (sin6->sin6_family != AF_INET6) return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { prefixlen = cmd.tcpm_prefixlen; if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && prefixlen > 32)) return -EINVAL; } else { prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; } if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); if (dev && netif_is_l3_master(dev)) l3index = dev->ifindex; rcu_read_unlock(); /* ok to reference set/not set outside of rcu; * right now device MUST be an L3 master */ if (!dev || !l3index) return -EINVAL; } if (!cmd.tcpm_keylen) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET, prefixlen, l3index, flags); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, AF_INET6, prefixlen, l3index, flags); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } addr = (union tcp_md5_addr *)&sin6->sin6_addr; /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; struct tcphdr *_th; bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); _th = (struct tcphdr *)(bp + 1); memcpy(_th, th, sizeof(*th)); _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { struct tcp_sigpool hp; if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) goto clear_hash_nostart; if (crypto_ahash_init(hp.req)) goto clear_hash; if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(&hp, key)) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); if (crypto_ahash_final(hp.req)) goto clear_hash; tcp_sigpool_end(&hp); return 0; clear_hash: tcp_sigpool_end(&hp); clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } static int tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); const struct in6_addr *saddr, *daddr; struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; daddr = &ip6h->daddr; } if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) goto clear_hash_nostart; if (crypto_ahash_init(hp.req)) goto clear_hash; if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(&hp, key)) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); if (crypto_ahash_final(hp.req)) goto clear_hash; tcp_sigpool_end(&hp); return 0; clear_hash: tcp_sigpool_end(&hp); clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } #endif static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb, u32 tw_isn) { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; ireq->ir_rmt_addr = LOOPBACK4_IPV6; ireq->ir_loc_addr = LOOPBACK4_IPV6; /* So that link locals have meaning */ if ((!sk_listener->sk_bound_dev_if || l3_slave) && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); if (!tw_isn && (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } } static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct request_sock *req, u32 tw_isn) { tcp_v6_init_req(req, sk, skb, tw_isn); if (security_inet_conn_request(sk, skb, req)) return NULL; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup_rsk, .ao_calc_key = tcp_v6_ao_calc_key_rsk, .ao_synack_hash = tcp_v6_ao_synack_hash, #endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, .init_seq = tcp_v6_init_seq, .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, int rst, u8 tclass, __be32 label, u32 priority, u32 txhash, struct tcp_key *key) { struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); unsigned int tot_len = sizeof(struct tcphdr); struct sock *ctl_sk = net->ipv6.tcp_sk; const struct tcphdr *th = tcp_hdr(skb); __be32 mrst = 0, *topt; struct dst_entry *dst; struct sk_buff *buff; struct tcphdr *t1; struct flowi6 fl6; u32 mark = 0; if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; if (tcp_key_is_ao(key)) tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) tot_len += sizeof(__be32); } #endif buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (!buff) return; skb_reserve(buff, MAX_TCP_HEADER); t1 = skb_push(buff, tot_len); skb_reset_transport_header(buff); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); t1->dest = th->source; t1->source = th->dest; t1->doff = tot_len / 4; t1->seq = htonl(seq); t1->ack_seq = htonl(ack); t1->ack = !rst || !th->ack; t1->rst = rst; t1->window = htons(win); topt = (__be32 *)(t1 + 1); if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *topt++ = htonl(tsval); *topt++ = htonl(tsecr); } if (mrst) *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif #ifdef CONFIG_TCP_AO if (tcp_key_is_ao(key)) { *topt++ = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key->ao_key) << 16) | (key->ao_key->sndid << 8) | (key->rcv_next)); tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, key->traffic_key, (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, t1, key->sne); } #endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; fl6.saddr = ipv6_hdr(skb)->daddr; fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); else { if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) oif = skb->skb_iif; fl6.flowi6_oif = oif; } if (sk) { /* unconstify the socket only to attach it to buff with care. */ skb_set_owner_edemux(buff, (struct sock *)sk); psp_reply_set_decrypted(sk, buff); if (sk->sk_state == TCP_TIME_WAIT) mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network * namespace */ if (sk && sk->sk_state != TCP_TIME_WAIT) dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ else dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass, priority); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); return; } kfree_skb(buff); } static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); const __u8 *md5_hash_location = NULL; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) bool allocated_traffic_key = false; #endif const struct tcp_ao_hdr *aoh; struct tcp_key key = {}; u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; #ifdef CONFIG_TCP_MD5SIG unsigned char newhash[16]; int genhash; struct sock *sk1 = NULL; #endif if (th->rst) return; /* If sk not NULL, it means we did a successful lookup and incoming * route had to be correct. prequeue might have dropped our dst. */ if (!sk && !ipv6_unicast_destination(skb)) return; net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); #endif #ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; /* sdif set, means packet ingressed via a device * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); if (key.md5_key) key.type = TCP_KEY_MD5; } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. * we are not loose security here: * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->source), dif, sdif); if (!sk1) goto out; /* sdif set, means packet ingressed via a device * in an L3 domain and dif is set to it. */ l3index = tcp_v6_sdif(skb) ? dif : 0; key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); if (!key.md5_key) goto out; key.type = TCP_KEY_MD5; genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif if (th->ack) seq = ntohl(th->ack_seq); else ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); #ifdef CONFIG_TCP_AO if (aoh) { int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, &key.ao_key, &key.traffic_key, &allocated_traffic_key, &key.rcv_next, &key.sne)) goto out; key.type = TCP_KEY_AO; } #endif if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); priority = inet_twsk(sk)->tw_priority; txhash = inet_twsk(sk)->tw_txhash; } } else { if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) label = ip6_flowlabel(ipv6h); } trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, label, priority, txhash, &key); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: if (allocated_traffic_key) kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, enum tcp_tw_status tw_status) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); u8 tclass = tw->tw_tclass; struct tcp_key key = {}; if (tw_status == TCP_TW_ACK_OOW) tclass &= ~INET_ECN_MASK; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; if (static_branch_unlikely(&tcp_ao_needed.key)) { /* FIXME: the segment to-be-acked is not verified yet */ ao_info = rcu_dereference(tcptw->ao_info); if (ao_info) { const struct tcp_ao_hdr *aoh; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto out; if (aoh) key.ao_key = tcp_ao_established_key(sk, ao_info, aoh->rnext_keyid, -1); } } if (key.ao_key) { struct tcp_ao_key *rnext_key; key.traffic_key = snd_other_key(key.ao_key); /* rcv_next switches to our rcv_next */ rnext_key = READ_ONCE(ao_info->rnext_key); key.rcv_next = rnext_key->rcvid; key.sne = READ_ONCE(ao_info->snd_sne); key.type = TCP_KEY_AO; #else if (0) { #endif #ifdef CONFIG_TCP_MD5SIG } else if (static_branch_unlikely(&tcp_md5_needed.key)) { key.md5_key = tcp_twsk_md5_key(tcptw); if (key.md5_key) key.type = TCP_KEY_MD5; #endif } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, READ_ONCE(tcptw->tw_rcv_nxt), tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); #ifdef CONFIG_TCP_AO out: #endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct tcp_key key = {}; #ifdef CONFIG_TCP_AO if (static_branch_unlikely(&tcp_ao_needed.key) && tcp_rsk_used_ao(req)) { const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; const struct tcp_ao_hdr *aoh; int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) return; if (!aoh) return; key.ao_key = tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, AF_INET6, aoh->rnext_keyid, -1); if (unlikely(!key.ao_key)) { /* Send ACK with any matching MKT for the peer */ key.ao_key = tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, AF_INET6, -1, -1); /* Matching key disappeared (user removed the key?) * let the handshake timeout. */ if (!key.ao_key) { net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", addr, ntohs(tcp_hdr(skb)->source), &ipv6_hdr(skb)->daddr, ntohs(tcp_hdr(skb)->dest)); return; } } key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); if (!key.traffic_key) return; key.type = TCP_KEY_AO; key.rcv_next = aoh->keyid; tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); #else if (0) { #endif #ifdef CONFIG_TCP_MD5SIG } else if (static_branch_unlikely(&tcp_md5_needed.key)) { int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index); if (key.md5_key) key.type = TCP_KEY_MD5; #endif } /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), req->ts_recent, sk->sk_bound_dev_if, &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) kfree(key.traffic_key); } static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_SYN_COOKIES const struct tcphdr *th = tcp_hdr(skb); if (!th->syn) sk = cookie_v6_check(sk, skb); #endif return sk; } u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, struct tcphdr *th, u32 *cookie) { u16 mss = 0; #ifdef CONFIG_SYN_COOKIES mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, th); if (mss) { *cookie = __cookie_v6_init_sequence(iph, th, &mss); tcp_synq_overflow(sk); } #endif return mss; } static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); return 0; } return tcp_conn_request(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, skb); drop: tcp_listendrop(sk); return 0; /* don't send reset */ } static void tcp_v6_restore_cb(struct sk_buff *skb) { /* We need to move header back to the beginning if xfrm6_policy_check() * and tcp_v6_fill_cb() are going to be called again. * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. */ memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, sizeof(struct inet6_skb_parm)); } static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; int l3index; #endif struct flowi6 fl6; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req); if (!newsk) return NULL; inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; if (sk_is_mptcp(newsk)) mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, tcp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) goto exit_overflow; if (!dst) { dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); if (!dst) goto exit; } newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) goto exit_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, tcp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ newsk->sk_gso_type = SKB_GSO_TCPV6; inet6_sk_rx_dst_set(newsk, skb); inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ip6_dst_store(newsk, dst, false, false); newnp->saddr = ireq->ir_v6_loc_addr; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; /* Clone native IPv6 options from listening socket (if any) Yes, keeping reference count would be much more clever, but we make one more one thing there: reattach optmem to newsk. */ opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (opt) inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + opt->opt_flen; tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); if (!tcp_rsk_used_ao(req)) { /* Copy over the MD5 key from the original socket */ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); if (key) { const union tcp_md5_addr *addr; addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) goto put_and_exit; } } #endif #ifdef CONFIG_TCP_AO /* Copy over tcp_ao_info if any */ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) goto put_and_exit; /* OOM */ #endif if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) tcp_v6_restore_cb(newnp->pktoptions); } } else { if (!req_unhash && found_dup_sk) { /* This code path should only be executed in the * syncookie case only */ bh_unlock_sock(newsk); sock_put(newsk); newsk = NULL; } } return newsk; exit_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); exit_nonewsk: dst_release(dst); exit: tcp_listendrop(sk); return NULL; put_and_exit: inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto exit; } INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, u32)); /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; enum skb_drop_reason reason; struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. From backlog it always goes here. Kerboom... Fortunately, tcp_rcv_established and rcv_established handle them correctly, but it is not case with tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK */ if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); reason = psp_sk_rx_policy_check(sk, skb); if (reason) goto err_discard; /* * socket locking is here for SMP purposes as backlog rcv * is currently called with bh processing disabled. */ /* Do Stevens' IPV6_PKTOPTIONS. Yes, guys, it is the only place in our code, where we may make it not affecting IPv4. The rest of code is protocol independent, and I do not like idea to uglify IPv4. Actually, all the idea behind IPV6_PKTOPTIONS looks not very well thought. For now we latch options, received in the last packet, enqueued by tcp. Feel free to propose better solution. --ANK (980728) */ if (np->rxopt.all && sk->sk_state != TCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst; dst = rcu_dereference_protected(sk->sk_rx_dst, lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, dst, sk->sk_rx_dst_cookie) == NULL) { RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); } } tcp_rcv_established(sk, skb); if (opt_skb) goto ipv6_pktoptions; return 0; } if (tcp_checksum_complete(skb)) goto csum_err; if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); if (nsk != sk) { if (nsk) { reason = tcp_child_process(sk, nsk, skb); if (reason) goto reset; } return 0; } } else sock_rps_save_rxhash(sk, skb); reason = tcp_rcv_state_process(sk, skb); if (reason) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; reset: tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: if (opt_skb) __kfree_skb(opt_skb); sk_skb_reason_drop(sk, skb, reason); return 0; csum_err: reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); err_discard: TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; ipv6_pktoptions: /* Do you ask, what is it? 1. skb was enqueued by tcp. 2. skb is added to tail of read queue, rather than out of order. 3. socket is not in passive state. 4. Finally, it really contains options, which user wants to receive. */ tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); opt_skb = xchg(&np->pktoptions, NULL); } } consume_skb(opt_skb); return 0; } static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, const struct tcphdr *th) { /* This is tricky: we move IP6CB at its correct location into * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because * _decode_session6() uses IP6CB(). * barrier() makes sure compiler won't play aliasing games. */ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), sizeof(struct inet6_skb_parm)); barrier(); TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { struct net *net = dev_net_rcu(skb->dev); enum skb_drop_reason drop_reason; enum tcp_tw_status tw_status; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; struct sock *sk = NULL; bool refcounted; int ret; u32 isn; drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; /* * Count it even if it's bad. */ __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; th = (const struct tcphdr *)skb->data; if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; } if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); lookup: sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), th->source, th->dest, inet6_iif(skb), sdif, &refcounted); if (!sk) goto no_tcp_socket; if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); bool req_stolen = false; struct sock *nsk; sk = req->rsk_listener; if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else drop_reason = tcp_inbound_hash(sk, req, skb, &hdr->saddr, &hdr->daddr, AF_INET6, dif, sdif); if (drop_reason) { sk_drops_skbadd(sk, skb); reqsk_put(req); goto discard_it; } if (tcp_checksum_complete(skb)) { reqsk_put(req); goto csum_error; } if (unlikely(sk->sk_state != TCP_LISTEN)) { nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); if (!nsk) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } sk = nsk; /* reuseport_migrate_sock() has already held one sk_refcnt * before returning. */ } else { sock_hold(sk); } refcounted = true; nsk = NULL; if (!tcp_filter(sk, skb, &drop_reason)) { th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen, &drop_reason); } if (!nsk) { reqsk_put(req); if (req_stolen) { /* Another cpu got exclusive access to req * and created a full blown socket. * Try to feed this packet to this socket * instead of discarding it. */ tcp_v6_restore_cb(skb); sock_put(sk); goto lookup; } goto discard_and_relse; } nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { enum sk_rst_reason rst_reason; rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v6_send_reset(nsk, skb, rst_reason); goto discard_and_relse; } sock_put(sk); return 0; } } process: if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); drop_reason = SKB_DROP_REASON_TCP_MINTTL; goto discard_and_relse; } } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto discard_and_relse; } drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, AF_INET6, dif, sdif); if (drop_reason) goto discard_and_relse; nf_reset_ct(skb); if (tcp_filter(sk, skb, &drop_reason)) goto discard_and_relse; th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); skb->dev = NULL; if (sk->sk_state == TCP_LISTEN) { ret = tcp_v6_do_rcv(sk, skb); goto put_and_return; } sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { ret = tcp_v6_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb, &drop_reason)) goto discard_and_relse; } bh_unlock_sock(sk); put_and_return: if (refcounted) sock_put(sk); return ret ? -1 : 0; no_tcp_socket: drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; tcp_v6_fill_cb(skb, hdr, th); if (tcp_checksum_complete(skb)) { csum_error: drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); } discard_it: SKB_DR_OR(drop_reason, NOT_SPECIFIED); sk_skb_reason_drop(sk, skb, drop_reason); return 0; discard_and_relse: sk_drops_skbadd(sk, skb); if (refcounted) sock_put(sk); goto discard_it; do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; inet_twsk_put(inet_twsk(sk)); goto discard_it; } tcp_v6_fill_cb(skb, hdr, th); if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; } tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, &drop_reason); switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2; sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif_l3_slave(skb), sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule_put(tw); sk = sk2; tcp_v6_restore_cb(skb); refcounted = false; __this_cpu_write(tcp_tw_isn, isn); goto process; } drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); if (drop_reason) break; } /* to ACK */ fallthrough; case TCP_TW_ACK: case TCP_TW_ACK_OOW: tcp_v6_timewait_ack(sk, skb, tw_status); break; case TCP_TW_RST: tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS: ; } goto discard_it; } void tcp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net_rcu(skb->dev); const struct ipv6hdr *hdr; const struct tcphdr *th; struct sock *sk; if (skb->pkt_type != PACKET_HOST) return; if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) return; hdr = ipv6_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) return; /* Note : We use inet6_iif() here, not tcp_v6_iif() */ sk = __inet6_lookup_established(net, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), inet6_iif(skb), inet6_sdif(skb)); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst && sk->sk_rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } } static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), }; INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); } const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .mtu_reduced = tcp_v6_mtu_reduced, }; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { #ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, .calc_ao_hash = tcp_v6_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, #endif }; #endif /* * TCP over IPv4 via INET6 API */ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .mtu_reduced = tcp_v4_mtu_reduced, }; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { #ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, .calc_ao_hash = tcp_v4_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, #endif }; static void tcp6_destruct_sock(struct sock *sk) { tcp_md5_destruct_sock(sk); tcp_ao_destroy_sock(sk, false); inet6_sock_destruct(sk); } #endif /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ static int tcp_v6_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_init_sock(sk); icsk->icsk_af_ops = &ipv6_specific; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; sk->sk_destruct = tcp6_destruct_sock; #endif return 0; } #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, const struct request_sock *req, int i) { long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; if (ttd < 0) ttd = 0; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], inet_rsk(req)->ir_num, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, from_kuid_munged(seq_user_ns(seq), sk_uid(req->rsk_listener)), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); } static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) { const struct in6_addr *dest, *src; __u16 destp, srcp; int timer_active; unsigned long timer_expires; const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; u8 icsk_pending; int rx_queue; int state; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); icsk_pending = smp_load_acquire(&icsk->icsk_pending); if (icsk_pending == ICSK_TIME_RETRANS || icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk_timeout(icsk); } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; timer_expires = icsk_timeout(icsk); } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; } else { timer_active = 0; timer_expires = jiffies; } state = inet_sk_state_load(sp); if (state == TCP_LISTEN) rx_queue = READ_ONCE(sp->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. */ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq), 0); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, state, READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), READ_ONCE(icsk->icsk_retransmits), from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), READ_ONCE(icsk->icsk_probes_out), sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), tcp_snd_cwnd(tp), state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { long delta = tw->tw_timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, READ_ONCE(tw->tw_substate), 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, refcount_read(&tw->tw_refcnt), tw); } static int tcp6_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; struct sock *sk = v; if (v == SEQ_START_TOKEN) { seq_puts(seq, " sl " "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" " uid timeout inode\n"); goto out; } st = seq->private; if (sk->sk_state == TCP_TIME_WAIT) get_timewait6_sock(seq, v, st->num); else if (sk->sk_state == TCP_NEW_SYN_RECV) get_openreq6(seq, v, st->num); else get_tcp6_sock(seq, v, st->num); out: return 0; } static const struct seq_operations tcp6_seq_ops = { .show = tcp6_seq_show, .start = tcp_seq_start, .next = tcp_seq_next, .stop = tcp_seq_stop, }; static struct tcp_seq_afinfo tcp6_seq_afinfo = { .family = AF_INET6, }; int __net_init tcp6_proc_init(struct net *net) { if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) return -ENOMEM; return 0; } void tcp6_proc_exit(struct net *net) { remove_proc_entry("tcp6", net->proc_net); } #endif struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, .close = tcp_close, .pre_connect = tcp_v6_pre_connect, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .splice_eof = tcp_splice_eof, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .put_port = inet_put_port, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = tcp_bpf_update_proto, #endif .enter_memory_pressure = tcp_enter_memory_pressure, .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = NULL, .no_autobind = true, .diag_destroy = tcp_abort, }; EXPORT_SYMBOL_GPL(tcpv6_prot); static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; static int __net_init tcpv6_net_init(struct net *net) { int res; res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, SOCK_RAW, IPPROTO_TCP, net); if (!res) net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; return res; } static void __net_exit tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); } static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, }; int __init tcpv6_init(void) { int ret; net_hotdata.tcpv6_protocol = (struct inet6_protocol) { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); if (ret) goto out; /* register inet6 protocol */ ret = inet6_register_protosw(&tcpv6_protosw); if (ret) goto out_tcpv6_protocol; ret = register_pernet_subsys(&tcpv6_net_ops); if (ret) goto out_tcpv6_protosw; ret = mptcpv6_init(); if (ret) goto out_tcpv6_pernet_subsys; out: return ret; out_tcpv6_pernet_subsys: unregister_pernet_subsys(&tcpv6_net_ops); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); goto out; } void tcpv6_exit(void) { unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); } |
| 21 20 6 11 49 49 28 38 21 17 21 17 20 49 15 22 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | // SPDX-License-Identifier: GPL-2.0 /* dvb-usb-remote.c is part of the DVB USB library. * * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de) * see dvb-usb-init.c for copyright information. * * This file contains functions for initializing the input-device and for handling remote-control-queries. */ #include "dvb-usb-common.h" #include <linux/usb/input.h> static unsigned int legacy_dvb_usb_get_keymap_index(const struct input_keymap_entry *ke, struct rc_map_table *keymap, unsigned int keymap_size) { unsigned int index; unsigned int scancode; if (ke->flags & INPUT_KEYMAP_BY_INDEX) { index = ke->index; } else { if (input_scancode_to_scalar(ke, &scancode)) return keymap_size; /* See if we can match the raw key code. */ for (index = 0; index < keymap_size; index++) if (keymap[index].scancode == scancode) break; /* See if there is an unused hole in the map */ if (index >= keymap_size) { for (index = 0; index < keymap_size; index++) { if (keymap[index].keycode == KEY_RESERVED || keymap[index].keycode == KEY_UNKNOWN) { break; } } } } return index; } static int legacy_dvb_usb_getkeycode(struct input_dev *dev, struct input_keymap_entry *ke) { struct dvb_usb_device *d = input_get_drvdata(dev); struct rc_map_table *keymap = d->props.rc.legacy.rc_map_table; unsigned int keymap_size = d->props.rc.legacy.rc_map_size; unsigned int index; index = legacy_dvb_usb_get_keymap_index(ke, keymap, keymap_size); if (index >= keymap_size) return -EINVAL; ke->keycode = keymap[index].keycode; if (ke->keycode == KEY_UNKNOWN) ke->keycode = KEY_RESERVED; ke->len = sizeof(keymap[index].scancode); memcpy(&ke->scancode, &keymap[index].scancode, ke->len); ke->index = index; return 0; } static int legacy_dvb_usb_setkeycode(struct input_dev *dev, const struct input_keymap_entry *ke, unsigned int *old_keycode) { struct dvb_usb_device *d = input_get_drvdata(dev); struct rc_map_table *keymap = d->props.rc.legacy.rc_map_table; unsigned int keymap_size = d->props.rc.legacy.rc_map_size; unsigned int index; index = legacy_dvb_usb_get_keymap_index(ke, keymap, keymap_size); /* * FIXME: Currently, it is not possible to increase the size of * scancode table. For it to happen, one possibility * would be to allocate a table with key_map_size + 1, * copying data, appending the new key on it, and freeing * the old one - or maybe just allocating some spare space */ if (index >= keymap_size) return -EINVAL; *old_keycode = keymap[index].keycode; keymap->keycode = ke->keycode; __set_bit(ke->keycode, dev->keybit); if (*old_keycode != KEY_RESERVED) { __clear_bit(*old_keycode, dev->keybit); for (index = 0; index < keymap_size; index++) { if (keymap[index].keycode == *old_keycode) { __set_bit(*old_keycode, dev->keybit); break; } } } return 0; } /* Remote-control poll function - called every dib->rc_query_interval ms to see * whether the remote control has received anything. * * TODO: Fix the repeat rate of the input device. */ static void legacy_dvb_usb_read_remote_control(struct work_struct *work) { struct dvb_usb_device *d = container_of(work, struct dvb_usb_device, rc_query_work.work); u32 event; int state; /* TODO: need a lock here. We can simply skip checking for the remote control if we're busy. */ /* when the parameter has been set to 1 via sysfs while the driver was running */ if (dvb_usb_disable_rc_polling) return; if (d->props.rc.legacy.rc_query(d,&event,&state)) { err("error while querying for an remote control event."); goto schedule; } switch (state) { case REMOTE_NO_KEY_PRESSED: break; case REMOTE_KEY_PRESSED: deb_rc("key pressed\n"); d->last_event = event; input_event(d->input_dev, EV_KEY, event, 1); input_sync(d->input_dev); input_event(d->input_dev, EV_KEY, d->last_event, 0); input_sync(d->input_dev); break; case REMOTE_KEY_REPEAT: deb_rc("key repeated\n"); input_event(d->input_dev, EV_KEY, event, 1); input_sync(d->input_dev); input_event(d->input_dev, EV_KEY, d->last_event, 0); input_sync(d->input_dev); break; default: break; } /* improved repeat handling ??? switch (state) { case REMOTE_NO_KEY_PRESSED: deb_rc("NO KEY PRESSED\n"); if (d->last_state != REMOTE_NO_KEY_PRESSED) { deb_rc("releasing event %d\n",d->last_event); input_event(d->rc_input_dev, EV_KEY, d->last_event, 0); input_sync(d->rc_input_dev); } d->last_state = REMOTE_NO_KEY_PRESSED; d->last_event = 0; break; case REMOTE_KEY_PRESSED: deb_rc("KEY PRESSED\n"); deb_rc("pressing event %d\n",event); input_event(d->rc_input_dev, EV_KEY, event, 1); input_sync(d->rc_input_dev); d->last_event = event; d->last_state = REMOTE_KEY_PRESSED; break; case REMOTE_KEY_REPEAT: deb_rc("KEY_REPEAT\n"); if (d->last_state != REMOTE_NO_KEY_PRESSED) { deb_rc("repeating event %d\n",d->last_event); input_event(d->rc_input_dev, EV_KEY, d->last_event, 2); input_sync(d->rc_input_dev); d->last_state = REMOTE_KEY_REPEAT; } default: break; } */ schedule: schedule_delayed_work(&d->rc_query_work,msecs_to_jiffies(d->props.rc.legacy.rc_interval)); } static int legacy_dvb_usb_remote_init(struct dvb_usb_device *d) { int i, err, rc_interval; struct input_dev *input_dev; input_dev = input_allocate_device(); if (!input_dev) return -ENOMEM; input_dev->evbit[0] = BIT_MASK(EV_KEY); input_dev->name = "IR-receiver inside an USB DVB receiver"; input_dev->phys = d->rc_phys; usb_to_input_id(d->udev, &input_dev->id); input_dev->dev.parent = &d->udev->dev; d->input_dev = input_dev; d->rc_dev = NULL; input_dev->getkeycode = legacy_dvb_usb_getkeycode; input_dev->setkeycode = legacy_dvb_usb_setkeycode; /* set the bits for the keys */ deb_rc("key map size: %d\n", d->props.rc.legacy.rc_map_size); for (i = 0; i < d->props.rc.legacy.rc_map_size; i++) { deb_rc("setting bit for event %d item %d\n", d->props.rc.legacy.rc_map_table[i].keycode, i); set_bit(d->props.rc.legacy.rc_map_table[i].keycode, input_dev->keybit); } /* setting these two values to non-zero, we have to manage key repeats */ input_dev->rep[REP_PERIOD] = d->props.rc.legacy.rc_interval; input_dev->rep[REP_DELAY] = d->props.rc.legacy.rc_interval + 150; input_set_drvdata(input_dev, d); err = input_register_device(input_dev); if (err) input_free_device(input_dev); rc_interval = d->props.rc.legacy.rc_interval; INIT_DELAYED_WORK(&d->rc_query_work, legacy_dvb_usb_read_remote_control); info("schedule remote query interval to %d msecs.", rc_interval); schedule_delayed_work(&d->rc_query_work, msecs_to_jiffies(rc_interval)); d->state |= DVB_USB_STATE_REMOTE; return err; } /* Remote-control poll function - called every dib->rc_query_interval ms to see * whether the remote control has received anything. * * TODO: Fix the repeat rate of the input device. */ static void dvb_usb_read_remote_control(struct work_struct *work) { struct dvb_usb_device *d = container_of(work, struct dvb_usb_device, rc_query_work.work); int err; /* TODO: need a lock here. We can simply skip checking for the remote control if we're busy. */ /* when the parameter has been set to 1 via sysfs while the * driver was running, or when bulk mode is enabled after IR init */ if (dvb_usb_disable_rc_polling || d->props.rc.core.bulk_mode) return; err = d->props.rc.core.rc_query(d); if (err) err("error %d while querying for an remote control event.", err); schedule_delayed_work(&d->rc_query_work, msecs_to_jiffies(d->props.rc.core.rc_interval)); } static int rc_core_dvb_usb_remote_init(struct dvb_usb_device *d) { int err, rc_interval; struct rc_dev *dev; dev = rc_allocate_device(d->props.rc.core.driver_type); if (!dev) return -ENOMEM; dev->driver_name = d->props.rc.core.module_name; dev->map_name = d->props.rc.core.rc_codes; dev->change_protocol = d->props.rc.core.change_protocol; dev->allowed_protocols = d->props.rc.core.allowed_protos; usb_to_input_id(d->udev, &dev->input_id); dev->device_name = d->desc->name; dev->input_phys = d->rc_phys; dev->dev.parent = &d->udev->dev; dev->priv = d; dev->scancode_mask = d->props.rc.core.scancode_mask; err = rc_register_device(dev); if (err < 0) { rc_free_device(dev); return err; } d->input_dev = NULL; d->rc_dev = dev; if (!d->props.rc.core.rc_query || d->props.rc.core.bulk_mode) return 0; /* Polling mode - initialize a work queue for handling it */ INIT_DELAYED_WORK(&d->rc_query_work, dvb_usb_read_remote_control); rc_interval = d->props.rc.core.rc_interval; info("schedule remote query interval to %d msecs.", rc_interval); schedule_delayed_work(&d->rc_query_work, msecs_to_jiffies(rc_interval)); return 0; } int dvb_usb_remote_init(struct dvb_usb_device *d) { int err; if (dvb_usb_disable_rc_polling) return 0; if (d->props.rc.legacy.rc_map_table && d->props.rc.legacy.rc_query) d->props.rc.mode = DVB_RC_LEGACY; else if (d->props.rc.core.rc_codes) d->props.rc.mode = DVB_RC_CORE; else return 0; usb_make_path(d->udev, d->rc_phys, sizeof(d->rc_phys)); strlcat(d->rc_phys, "/ir0", sizeof(d->rc_phys)); /* Start the remote-control polling. */ if (d->props.rc.legacy.rc_interval < 40) d->props.rc.legacy.rc_interval = 100; /* default */ if (d->props.rc.mode == DVB_RC_LEGACY) err = legacy_dvb_usb_remote_init(d); else err = rc_core_dvb_usb_remote_init(d); if (err) return err; d->state |= DVB_USB_STATE_REMOTE; return 0; } int dvb_usb_remote_exit(struct dvb_usb_device *d) { if (d->state & DVB_USB_STATE_REMOTE) { cancel_delayed_work_sync(&d->rc_query_work); if (d->props.rc.mode == DVB_RC_LEGACY) input_unregister_device(d->input_dev); else rc_unregister_device(d->rc_dev); } d->state &= ~DVB_USB_STATE_REMOTE; return 0; } #define DVB_USB_RC_NEC_EMPTY 0x00 #define DVB_USB_RC_NEC_KEY_PRESSED 0x01 #define DVB_USB_RC_NEC_KEY_REPEATED 0x02 int dvb_usb_nec_rc_key_to_event(struct dvb_usb_device *d, u8 keybuf[5], u32 *event, int *state) { int i; struct rc_map_table *keymap = d->props.rc.legacy.rc_map_table; *event = 0; *state = REMOTE_NO_KEY_PRESSED; switch (keybuf[0]) { case DVB_USB_RC_NEC_EMPTY: break; case DVB_USB_RC_NEC_KEY_PRESSED: if ((u8) ~keybuf[1] != keybuf[2] || (u8) ~keybuf[3] != keybuf[4]) { deb_err("remote control checksum failed.\n"); break; } /* See if we can match the raw key code. */ for (i = 0; i < d->props.rc.legacy.rc_map_size; i++) if (rc5_custom(&keymap[i]) == keybuf[1] && rc5_data(&keymap[i]) == keybuf[3]) { *event = keymap[i].keycode; *state = REMOTE_KEY_PRESSED; return 0; } deb_err("key mapping failed - no appropriate key found in keymapping\n"); break; case DVB_USB_RC_NEC_KEY_REPEATED: *state = REMOTE_KEY_REPEAT; break; default: deb_err("unknown type of remote status: %d\n",keybuf[0]); break; } return 0; } EXPORT_SYMBOL(dvb_usb_nec_rc_key_to_event); |
| 10 1625 1629 102 1630 1620 1622 1817 1785 9 7 7 27 25 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/macsec.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, METADATA_MACSEC, METADATA_XFRM, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct macsec_info { sci_t sci; }; struct xfrm_md_info { u32 if_id; int link; struct dst_entry *dst_orig; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; struct macsec_info macsec_info; struct xfrm_md_info xfrm_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) { return (struct xfrm_md_info *)lwt->data; } static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; dst = skb_dst(skb); if (dst && dst->lwtstate && dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) return lwt_xfrm_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); case METADATA_MACSEC: return memcmp(&a->u.macsec_info, &b->u.macsec_info, sizeof(a->u.macsec_info)); case METADATA_XFRM: return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, sizeof(a->u.xfrm_info)); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { int ret; ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); if (ret) { metadata_dst_free(new_md); return ERR_PTR(ret); } } #endif skb_dst_drop(skb); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; tun_dst = __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); if (tun_dst && (iph->frag_off & htons(IP_DF))) __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_dst->u.tun_info.key.tun_flags); return tun_dst; } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ |
| 5 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID Driver for ELAN Touchpad * * Currently only supports touchpad found on HP Pavilion X2 10 * * Copyright (c) 2016 Alexandrov Stanislav <neko@nya.ai> */ #include <linux/hid.h> #include <linux/input/mt.h> #include <linux/leds.h> #include <linux/module.h> #include <linux/usb.h> #include "hid-ids.h" #define ELAN_MT_I2C 0x5d #define ELAN_SINGLE_FINGER 0x81 #define ELAN_MT_FIRST_FINGER 0x82 #define ELAN_MT_SECOND_FINGER 0x83 #define ELAN_INPUT_REPORT_SIZE 8 #define ELAN_I2C_REPORT_SIZE 32 #define ELAN_FINGER_DATA_LEN 5 #define ELAN_MAX_FINGERS 5 #define ELAN_MAX_PRESSURE 255 #define ELAN_TP_USB_INTF 1 #define ELAN_FEATURE_REPORT 0x0d #define ELAN_FEATURE_SIZE 5 #define ELAN_PARAM_MAX_X 6 #define ELAN_PARAM_MAX_Y 7 #define ELAN_PARAM_RES 8 #define ELAN_MUTE_LED_REPORT 0xBC #define ELAN_LED_REPORT_SIZE 8 #define ELAN_HAS_LED BIT(0) struct elan_drvdata { struct input_dev *input; u8 prev_report[ELAN_INPUT_REPORT_SIZE]; struct led_classdev mute_led; u8 mute_led_state; u16 max_x; u16 max_y; u16 res_x; u16 res_y; }; static int is_not_elan_touchpad(struct hid_device *hdev) { if (hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); return (intf->altsetting->desc.bInterfaceNumber != ELAN_TP_USB_INTF); } return 0; } static int elan_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if (is_not_elan_touchpad(hdev)) return 0; if (field->report->id == ELAN_SINGLE_FINGER || field->report->id == ELAN_MT_FIRST_FINGER || field->report->id == ELAN_MT_SECOND_FINGER || field->report->id == ELAN_MT_I2C) return -1; return 0; } static int elan_get_device_param(struct hid_device *hdev, unsigned char *dmabuf, unsigned char param) { int ret; dmabuf[0] = ELAN_FEATURE_REPORT; dmabuf[1] = 0x05; dmabuf[2] = 0x03; dmabuf[3] = param; dmabuf[4] = 0x01; ret = hid_hw_raw_request(hdev, ELAN_FEATURE_REPORT, dmabuf, ELAN_FEATURE_SIZE, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret != ELAN_FEATURE_SIZE) { hid_err(hdev, "Set report error for parm %d: %d\n", param, ret); return ret; } ret = hid_hw_raw_request(hdev, ELAN_FEATURE_REPORT, dmabuf, ELAN_FEATURE_SIZE, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret != ELAN_FEATURE_SIZE) { hid_err(hdev, "Get report error for parm %d: %d\n", param, ret); return ret; } return 0; } static unsigned int elan_convert_res(char val) { /* * (value from firmware) * 10 + 790 = dpi * dpi * 10 / 254 = dots/mm */ return (val * 10 + 790) * 10 / 254; } static int elan_get_device_params(struct hid_device *hdev) { struct elan_drvdata *drvdata = hid_get_drvdata(hdev); unsigned char *dmabuf; int ret; dmabuf = kmalloc(ELAN_FEATURE_SIZE, GFP_KERNEL); if (!dmabuf) return -ENOMEM; ret = elan_get_device_param(hdev, dmabuf, ELAN_PARAM_MAX_X); if (ret) goto err; drvdata->max_x = (dmabuf[4] << 8) | dmabuf[3]; ret = elan_get_device_param(hdev, dmabuf, ELAN_PARAM_MAX_Y); if (ret) goto err; drvdata->max_y = (dmabuf[4] << 8) | dmabuf[3]; ret = elan_get_device_param(hdev, dmabuf, ELAN_PARAM_RES); if (ret) goto err; drvdata->res_x = elan_convert_res(dmabuf[3]); drvdata->res_y = elan_convert_res(dmabuf[4]); err: kfree(dmabuf); return ret; } static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi) { int ret; struct input_dev *input; struct elan_drvdata *drvdata = hid_get_drvdata(hdev); if (is_not_elan_touchpad(hdev)) return 0; ret = elan_get_device_params(hdev); if (ret) return ret; input = devm_input_allocate_device(&hdev->dev); if (!input) return -ENOMEM; input->name = "Elan Touchpad"; input->phys = hdev->phys; input->uniq = hdev->uniq; input->id.bustype = hdev->bus; input->id.vendor = hdev->vendor; input->id.product = hdev->product; input->id.version = hdev->version; input->dev.parent = &hdev->dev; input_set_abs_params(input, ABS_MT_POSITION_X, 0, drvdata->max_x, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, drvdata->max_y, 0, 0); input_set_abs_params(input, ABS_MT_PRESSURE, 0, ELAN_MAX_PRESSURE, 0, 0); __set_bit(BTN_LEFT, input->keybit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); ret = input_mt_init_slots(input, ELAN_MAX_FINGERS, INPUT_MT_POINTER); if (ret) { hid_err(hdev, "Failed to init elan MT slots: %d\n", ret); return ret; } input_abs_set_res(input, ABS_X, drvdata->res_x); input_abs_set_res(input, ABS_Y, drvdata->res_y); ret = input_register_device(input); if (ret) { hid_err(hdev, "Failed to register elan input device: %d\n", ret); input_mt_destroy_slots(input); return ret; } drvdata->input = input; return 0; } static void elan_report_mt_slot(struct elan_drvdata *drvdata, u8 *data, unsigned int slot_num) { struct input_dev *input = drvdata->input; int x, y, p; bool active = !!data; input_mt_slot(input, slot_num); input_mt_report_slot_state(input, MT_TOOL_FINGER, active); if (active) { x = ((data[0] & 0xF0) << 4) | data[1]; y = drvdata->max_y - (((data[0] & 0x07) << 8) | data[2]); p = data[4]; input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); input_report_abs(input, ABS_MT_PRESSURE, p); } } static void elan_usb_report_input(struct elan_drvdata *drvdata, u8 *data) { int i; struct input_dev *input = drvdata->input; /* * There is 3 types of reports: for single touch, * for multitouch - first finger and for multitouch - second finger * * packet structure for ELAN_SINGLE_FINGER and ELAN_MT_FIRST_FINGER: * * byte 1: 1 0 0 0 0 0 0 1 // 0x81 or 0x82 * byte 2: 0 0 0 0 0 0 0 0 // looks like unused * byte 3: f5 f4 f3 f2 f1 0 0 L * byte 4: x12 x11 x10 x9 0? y11 y10 y9 * byte 5: x8 x7 x6 x5 x4 x3 x2 x1 * byte 6: y8 y7 y6 y5 y4 y3 y2 y1 * byte 7: sy4 sy3 sy2 sy1 sx4 sx3 sx2 sx1 * byte 8: p8 p7 p6 p5 p4 p3 p2 p1 * * packet structure for ELAN_MT_SECOND_FINGER: * * byte 1: 1 0 0 0 0 0 1 1 // 0x83 * byte 2: x12 x11 x10 x9 0 y11 y10 y9 * byte 3: x8 x7 x6 x5 x4 x3 x2 x1 * byte 4: y8 y7 y6 y5 y4 y3 y2 y1 * byte 5: sy4 sy3 sy2 sy1 sx4 sx3 sx2 sx1 * byte 6: p8 p7 p6 p5 p4 p3 p2 p1 * byte 7: 0 0 0 0 0 0 0 0 * byte 8: 0 0 0 0 0 0 0 0 * * f5-f1: finger touch bits * L: clickpad button * sy / sx: finger width / height expressed in traces, the total number * of traces can be queried by doing a HID_REQ_SET_REPORT * { 0x0d, 0x05, 0x03, 0x05, 0x01 } followed by a GET, in the * returned buf, buf[3]=no-x-traces, buf[4]=no-y-traces. * p: pressure */ if (data[0] == ELAN_SINGLE_FINGER) { for (i = 0; i < ELAN_MAX_FINGERS; i++) { if (data[2] & BIT(i + 3)) elan_report_mt_slot(drvdata, data + 3, i); else elan_report_mt_slot(drvdata, NULL, i); } input_report_key(input, BTN_LEFT, data[2] & 0x01); } /* * When touched with two fingers Elan touchpad will emit two HID reports * first is ELAN_MT_FIRST_FINGER and second is ELAN_MT_SECOND_FINGER * we will save ELAN_MT_FIRST_FINGER report and wait for * ELAN_MT_SECOND_FINGER to finish multitouch */ if (data[0] == ELAN_MT_FIRST_FINGER) { memcpy(drvdata->prev_report, data, sizeof(drvdata->prev_report)); return; } if (data[0] == ELAN_MT_SECOND_FINGER) { int first = 0; u8 *prev_report = drvdata->prev_report; if (prev_report[0] != ELAN_MT_FIRST_FINGER) return; for (i = 0; i < ELAN_MAX_FINGERS; i++) { if (prev_report[2] & BIT(i + 3)) { if (!first) { first = 1; elan_report_mt_slot(drvdata, prev_report + 3, i); } else { elan_report_mt_slot(drvdata, data + 1, i); } } else { elan_report_mt_slot(drvdata, NULL, i); } } input_report_key(input, BTN_LEFT, prev_report[2] & 0x01); } input_mt_sync_frame(input); input_sync(input); } static void elan_i2c_report_input(struct elan_drvdata *drvdata, u8 *data) { struct input_dev *input = drvdata->input; u8 *finger_data; int i; /* * Elan MT touchpads in i2c mode send finger data in the same format * as in USB mode, but then with all fingers in a single packet. * * packet structure for ELAN_MT_I2C: * * byte 1: 1 0 0 1 1 1 0 1 // 0x5d * byte 2: f5 f4 f3 f2 f1 0 0 L * byte 3: x12 x11 x10 x9 0? y11 y10 y9 * byte 4: x8 x7 x6 x5 x4 x3 x2 x1 * byte 5: y8 y7 y6 y5 y4 y3 y2 y1 * byte 6: sy4 sy3 sy2 sy1 sx4 sx3 sx2 sx1 * byte 7: p8 p7 p6 p5 p4 p3 p2 p1 * byte 8-12: Same as byte 3-7 for second finger down * byte 13-17: Same as byte 3-7 for third finger down * byte 18-22: Same as byte 3-7 for fourth finger down * byte 23-27: Same as byte 3-7 for fifth finger down */ finger_data = data + 2; for (i = 0; i < ELAN_MAX_FINGERS; i++) { if (data[1] & BIT(i + 3)) { elan_report_mt_slot(drvdata, finger_data, i); finger_data += ELAN_FINGER_DATA_LEN; } else { elan_report_mt_slot(drvdata, NULL, i); } } input_report_key(input, BTN_LEFT, data[1] & 0x01); input_mt_sync_frame(input); input_sync(input); } static int elan_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct elan_drvdata *drvdata = hid_get_drvdata(hdev); if (is_not_elan_touchpad(hdev)) return 0; if (data[0] == ELAN_SINGLE_FINGER || data[0] == ELAN_MT_FIRST_FINGER || data[0] == ELAN_MT_SECOND_FINGER) { if (size == ELAN_INPUT_REPORT_SIZE) { elan_usb_report_input(drvdata, data); return 1; } } if (data[0] == ELAN_MT_I2C && size == ELAN_I2C_REPORT_SIZE) { elan_i2c_report_input(drvdata, data); return 1; } return 0; } static int elan_start_multitouch(struct hid_device *hdev) { int ret; /* * This byte sequence will enable multitouch mode and disable * mouse emulation */ static const unsigned char buf[] = { 0x0D, 0x00, 0x03, 0x21, 0x00 }; unsigned char *dmabuf = kmemdup(buf, sizeof(buf), GFP_KERNEL); if (!dmabuf) return -ENOMEM; ret = hid_hw_raw_request(hdev, dmabuf[0], dmabuf, sizeof(buf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); kfree(dmabuf); if (ret != sizeof(buf)) { hid_err(hdev, "Failed to start multitouch: %d\n", ret); return ret; } return 0; } static int elan_mute_led_set_brigtness(struct led_classdev *led_cdev, enum led_brightness value) { int ret; u8 led_state; struct device *dev = led_cdev->dev->parent; struct hid_device *hdev = to_hid_device(dev); struct elan_drvdata *drvdata = hid_get_drvdata(hdev); unsigned char *dmabuf = kzalloc(ELAN_LED_REPORT_SIZE, GFP_KERNEL); if (!dmabuf) return -ENOMEM; led_state = !!value; dmabuf[0] = ELAN_MUTE_LED_REPORT; dmabuf[1] = 0x02; dmabuf[2] = led_state; ret = hid_hw_raw_request(hdev, dmabuf[0], dmabuf, ELAN_LED_REPORT_SIZE, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); kfree(dmabuf); if (ret != ELAN_LED_REPORT_SIZE) { if (ret != -ENODEV) hid_err(hdev, "Failed to set mute led brightness: %d\n", ret); return ret < 0 ? ret : -EIO; } drvdata->mute_led_state = led_state; return 0; } static int elan_init_mute_led(struct hid_device *hdev) { struct elan_drvdata *drvdata = hid_get_drvdata(hdev); struct led_classdev *mute_led = &drvdata->mute_led; mute_led->name = "elan:red:mute"; mute_led->default_trigger = "audio-mute"; mute_led->brightness_set_blocking = elan_mute_led_set_brigtness; mute_led->max_brightness = LED_ON; mute_led->flags = LED_HW_PLUGGABLE; mute_led->dev = &hdev->dev; return devm_led_classdev_register(&hdev->dev, mute_led); } static int elan_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; struct elan_drvdata *drvdata; drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; hid_set_drvdata(hdev, drvdata); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "Hid Parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "Hid hw start failed\n"); return ret; } if (is_not_elan_touchpad(hdev)) return 0; if (!drvdata->input) { hid_err(hdev, "Input device is not registered\n"); ret = -ENAVAIL; goto err; } ret = elan_start_multitouch(hdev); if (ret) goto err; if (id->driver_data & ELAN_HAS_LED) { ret = elan_init_mute_led(hdev); if (ret) goto err; } return 0; err: hid_hw_stop(hdev); return ret; } static const struct hid_device_id elan_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2), .driver_data = ELAN_HAS_LED }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2_10_COVER), .driver_data = ELAN_HAS_LED }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_TOSHIBA_CLICK_L9W) }, { } }; MODULE_DEVICE_TABLE(hid, elan_devices); static struct hid_driver elan_driver = { .name = "elan", .id_table = elan_devices, .input_mapping = elan_input_mapping, .input_configured = elan_input_configured, .raw_event = elan_raw_event, .probe = elan_probe, }; module_hid_driver(elan_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alexandrov Stanislav"); MODULE_DESCRIPTION("Driver for HID ELAN Touchpads"); |
| 410 410 8 265 267 7 9 1 10 9 10 10 10 10 9 1 10 7 292 293 30 30 30 30 30 29 29 69 20 292 292 85 293 3 293 292 293 295 295 84 293 293 83 1 3 3 294 3 2 2 289 80 3 1 2 289 290 292 292 3 289 358 3 3 3 1 2 294 1 293 4 292 1 291 17 293 3 292 1 292 3 1 1 1 1 1 1 1 2 293 275 276 276 276 276 2 27 3 26 27 2 29 29 29 29 29 29 29 29 29 29 29 29 29 29 29 29 29 293 292 293 1 292 2 291 292 292 292 296 297 298 297 2 297 3 294 2 294 19 70 70 294 307 307 10 4 294 4 293 8 8 6 5 8 8 6 2 8 8 7 2 8 7 3 8 3 3 6 6 4 7 5 3 2 5 2 3 2 1 1 1 1 9 9 4 1 1 1 1 357 314 273 310 357 267 278 278 2 270 357 356 4 4 4 4 4 4 4 1 4 4 4 3 1 4 4 4 9 8 4 4 9 4 6 16 16 7 8 7 8 1 1 2 3 1 1 278 3 1 13 8 9 3 14 14 4 14 2 11 1 10 11 11 1 11 2 1 9 |