1 5 2 2 1 1 2 98 6 4 4 4 114 114 114 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | // SPDX-License-Identifier: GPL-2.0-or-later /* * 32bit -> 64bit ioctl wrapper for timer API * Copyright (c) by Takashi Iwai <tiwai@suse.de> */ /* This file included from timer.c */ #include <linux/compat.h> /* * ILP32/LP64 has different size for 'long' type. Additionally, the size * of storage alignment differs depending on architectures. Here, '__packed' * qualifier is used so that the size of this structure is multiple of 4 and * it fits to any architectures with 32 bit storage alignment. */ struct snd_timer_gparams32 { struct snd_timer_id tid; u32 period_num; u32 period_den; unsigned char reserved[32]; } __packed; struct snd_timer_info32 { u32 flags; s32 card; unsigned char id[64]; unsigned char name[80]; u32 reserved0; u32 resolution; unsigned char reserved[64]; }; static int snd_timer_user_gparams_compat(struct file *file, struct snd_timer_gparams32 __user *user) { struct snd_timer_gparams gparams; if (copy_from_user(&gparams.tid, &user->tid, sizeof(gparams.tid)) || get_user(gparams.period_num, &user->period_num) || get_user(gparams.period_den, &user->period_den)) return -EFAULT; return timer_set_gparams(&gparams); } static int snd_timer_user_info_compat(struct file *file, struct snd_timer_info32 __user *_info) { struct snd_timer_user *tu; struct snd_timer_info32 info; struct snd_timer *t; tu = file->private_data; if (!tu->timeri) return -EBADFD; t = tu->timeri->timer; if (!t) return -EBADFD; memset(&info, 0, sizeof(info)); info.card = t->card ? t->card->number : -1; if (t->hw.flags & SNDRV_TIMER_HW_SLAVE) info.flags |= SNDRV_TIMER_FLG_SLAVE; strscpy(info.id, t->id, sizeof(info.id)); strscpy(info.name, t->name, sizeof(info.name)); info.resolution = t->hw.resolution; if (copy_to_user(_info, &info, sizeof(*_info))) return -EFAULT; return 0; } enum { SNDRV_TIMER_IOCTL_GPARAMS32 = _IOW('T', 0x04, struct snd_timer_gparams32), SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32), SNDRV_TIMER_IOCTL_STATUS_COMPAT32 = _IOW('T', 0x14, struct snd_timer_status32), SNDRV_TIMER_IOCTL_STATUS_COMPAT64 = _IOW('T', 0x14, struct snd_timer_status64), }; static long __snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); switch (cmd) { case SNDRV_TIMER_IOCTL_PVERSION: case SNDRV_TIMER_IOCTL_TREAD_OLD: case SNDRV_TIMER_IOCTL_TREAD64: case SNDRV_TIMER_IOCTL_GINFO: case SNDRV_TIMER_IOCTL_GSTATUS: case SNDRV_TIMER_IOCTL_SELECT: case SNDRV_TIMER_IOCTL_PARAMS: case SNDRV_TIMER_IOCTL_START: case SNDRV_TIMER_IOCTL_START_OLD: case SNDRV_TIMER_IOCTL_STOP: case SNDRV_TIMER_IOCTL_STOP_OLD: case SNDRV_TIMER_IOCTL_CONTINUE: case SNDRV_TIMER_IOCTL_CONTINUE_OLD: case SNDRV_TIMER_IOCTL_PAUSE: case SNDRV_TIMER_IOCTL_PAUSE_OLD: case SNDRV_TIMER_IOCTL_NEXT_DEVICE: return __snd_timer_user_ioctl(file, cmd, (unsigned long)argp, true); case SNDRV_TIMER_IOCTL_GPARAMS32: return snd_timer_user_gparams_compat(file, argp); case SNDRV_TIMER_IOCTL_INFO32: return snd_timer_user_info_compat(file, argp); case SNDRV_TIMER_IOCTL_STATUS_COMPAT32: return snd_timer_user_status32(file, argp); case SNDRV_TIMER_IOCTL_STATUS_COMPAT64: return snd_timer_user_status64(file, argp); } return -ENOIOCTLCMD; } static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_timer_user *tu = file->private_data; guard(mutex)(&tu->ioctl_lock); return __snd_timer_user_ioctl_compat(file, cmd, arg); } |
2 4190 2 4188 3396 3397 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | // SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/dir.c - sysfs core and dir operation implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * * Please see Documentation/filesystems/sysfs.rst for more information. */ #define pr_fmt(fmt) "sysfs: " fmt #include <linux/fs.h> #include <linux/kobject.h> #include <linux/slab.h> #include "sysfs.h" DEFINE_SPINLOCK(sysfs_symlink_target_lock); void sysfs_warn_dup(struct kernfs_node *parent, const char *name) { char *buf; buf = kzalloc(PATH_MAX, GFP_KERNEL); if (buf) kernfs_path(parent, buf, PATH_MAX); pr_warn("cannot create duplicate filename '%s/%s'\n", buf, name); dump_stack(); kfree(buf); } /** * sysfs_create_dir_ns - create a directory for an object with a namespace tag * @kobj: object we're creating directory for * @ns: the namespace tag to use */ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { struct kernfs_node *parent, *kn; kuid_t uid; kgid_t gid; if (WARN_ON(!kobj)) return -EINVAL; if (kobj->parent) parent = kobj->parent->sd; else parent = sysfs_root_kn; if (!parent) return -ENOENT; kobject_get_ownership(kobj, &uid, &gid); kn = kernfs_create_dir_ns(parent, kobject_name(kobj), 0755, uid, gid, kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, kobject_name(kobj)); return PTR_ERR(kn); } kobj->sd = kn; return 0; } /** * sysfs_remove_dir - remove an object's directory. * @kobj: object. * * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be sysfs_rmdir() below, instead of calling separately. */ void sysfs_remove_dir(struct kobject *kobj) { struct kernfs_node *kn = kobj->sd; /* * In general, kobject owner is responsible for ensuring removal * doesn't race with other operations and sysfs doesn't provide any * protection; however, when @kobj is used as a symlink target, the * symlinking entity usually doesn't own @kobj and thus has no * control over removal. @kobj->sd may be removed anytime * and symlink code may end up dereferencing an already freed node. * * sysfs_symlink_target_lock synchronizes @kobj->sd * disassociation against symlink operations so that symlink code * can safely dereference @kobj->sd. */ spin_lock(&sysfs_symlink_target_lock); kobj->sd = NULL; spin_unlock(&sysfs_symlink_target_lock); if (kn) { WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); kernfs_remove(kn); } } int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns) { struct kernfs_node *parent; int ret; parent = kernfs_get_parent(kobj->sd); ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); kernfs_put(parent); return ret; } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns) { struct kernfs_node *kn = kobj->sd; struct kernfs_node *new_parent; new_parent = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : sysfs_root_kn; return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); } /** * sysfs_create_mount_point - create an always empty directory * @parent_kobj: kobject that will contain this always empty directory * @name: The name of the always empty directory to add */ int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) { struct kernfs_node *kn, *parent = parent_kobj->sd; kn = kernfs_create_empty_dir(parent, name); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, name); return PTR_ERR(kn); } return 0; } EXPORT_SYMBOL_GPL(sysfs_create_mount_point); /** * sysfs_remove_mount_point - remove an always empty directory. * @parent_kobj: kobject that will contain this always empty directory * @name: The name of the always empty directory to remove * */ void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) { struct kernfs_node *parent = parent_kobj->sd; kernfs_remove_by_name_ns(parent, name, NULL); } EXPORT_SYMBOL_GPL(sysfs_remove_mount_point); |
4 1 4 10 10 1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "main.h" #include <linux/errno.h> #include <linux/list.h> #include <linux/moduleparam.h> #include <linux/netlink.h> #include <linux/printk.h> #include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/string.h> #include <net/genetlink.h> #include <net/netlink.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "netlink.h" char batadv_routing_algo[20] = "BATMAN_IV"; static struct hlist_head batadv_algo_list; /** * batadv_algo_init() - Initialize batman-adv algorithm management data * structures */ void batadv_algo_init(void) { INIT_HLIST_HEAD(&batadv_algo_list); } /** * batadv_algo_get() - Search for algorithm with specific name * @name: algorithm name to find * * Return: Pointer to batadv_algo_ops on success, NULL otherwise */ struct batadv_algo_ops *batadv_algo_get(const char *name) { struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { if (strcmp(bat_algo_ops_tmp->name, name) != 0) continue; bat_algo_ops = bat_algo_ops_tmp; break; } return bat_algo_ops; } /** * batadv_algo_register() - Register callbacks for a mesh algorithm * @bat_algo_ops: mesh algorithm callbacks to add * * Return: 0 on success or negative error number in case of failure */ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) { struct batadv_algo_ops *bat_algo_ops_tmp; bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); if (bat_algo_ops_tmp) { pr_info("Trying to register already registered routing algorithm: %s\n", bat_algo_ops->name); return -EEXIST; } /* all algorithms must implement all ops (for now) */ if (!bat_algo_ops->iface.enable || !bat_algo_ops->iface.disable || !bat_algo_ops->iface.update_mac || !bat_algo_ops->iface.primary_set || !bat_algo_ops->neigh.cmp || !bat_algo_ops->neigh.is_similar_or_better) { pr_info("Routing algo '%s' does not implement required ops\n", bat_algo_ops->name); return -EINVAL; } INIT_HLIST_NODE(&bat_algo_ops->list); hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); return 0; } /** * batadv_algo_select() - Select algorithm of soft interface * @bat_priv: the bat priv with all the soft interface information * @name: name of the algorithm to select * * The algorithm callbacks for the soft interface will be set when the algorithm * with the correct name was found. Any previous selected algorithm will not be * deinitialized and the new selected algorithm will also not be initialized. * It is therefore not allowed to call batadv_algo_select outside the creation * function of the soft interface. * * Return: 0 on success or negative error number in case of failure */ int batadv_algo_select(struct batadv_priv *bat_priv, const char *name) { struct batadv_algo_ops *bat_algo_ops; bat_algo_ops = batadv_algo_get(name); if (!bat_algo_ops) return -EINVAL; bat_priv->algo_ops = bat_algo_ops; return 0; } static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; char *algo_name = (char *)val; size_t name_len = strlen(algo_name); if (name_len > 0 && algo_name[name_len - 1] == '\n') algo_name[name_len - 1] = '\0'; bat_algo_ops = batadv_algo_get(algo_name); if (!bat_algo_ops) { pr_err("Routing algorithm '%s' is not supported\n", algo_name); return -EINVAL; } return param_set_copystring(algo_name, kp); } static const struct kernel_param_ops batadv_param_ops_ra = { .set = batadv_param_set_ra, .get = param_get_string, }; static struct kparam_string batadv_param_string_ra = { .maxlen = sizeof(batadv_routing_algo), .string = batadv_routing_algo, }; module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, 0644); /** * batadv_algo_dump_entry() - fill in information about one supported routing * algorithm * @msg: netlink message to be sent back * @portid: Port to reply to * @seq: Sequence number of message * @bat_algo_ops: Algorithm to be dumped * * Return: Error number, or 0 on success */ static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_algo_ops *bat_algo_ops) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS); if (!hdr) return -EMSGSIZE; if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_algo_dump() - fill in information about supported routing * algorithms * @msg: netlink message to be sent back * @cb: Parameters to the netlink request * * Return: Length of reply message. */ int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb) { int portid = NETLINK_CB(cb->skb).portid; struct batadv_algo_ops *bat_algo_ops; int skip = cb->args[0]; int i = 0; hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { if (i++ < skip) continue; if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq, bat_algo_ops)) { i--; break; } } cb->args[0] = i; return msg->len; } |
26 26 17 3 6 18 5 21 91 5 7 10 94 10 91 1 122 33 82 1 39 40 40 40 18 85 9 78 4 108 108 106 1 104 1 103 117 117 110 4 29 76 3 105 3 97 1 102 327 12 12 98 204 206 205 204 203 206 75 5 4 92 92 41 11 121 7 116 33 59 27 18 86 24 8 104 103 206 25 8 8 5 7 7 6 4 5 5 2 2 5 2 4 5 6 6 6 6 6 6 3 6 3 6 3 3 6 6 6 2 5 5 5 5 5 4 104 104 103 57 11 93 88 4 84 16 235 233 235 229 233 234 235 9 226 226 226 148 6 3 1 1 51 1 1 1 13 215 9 10 169 13 2 2 7 1 84 84 84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Internet Control Message Protocol (ICMPv6) * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on net/ipv4/icmp.c * * RFC 1885 */ /* * Changes: * * Andi Kleen : exception handling * Andi Kleen add rate limits. never reply to a icmp. * add more length checks and other fixes. * yoshfuji : ensure to sent parameter problem for * fragments. * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. * Randy Dunlap and * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/netfilter.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> #include <net/ip.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <net/ping.h> #include <net/protocol.h> #include <net/raw.h> #include <net/rawv6.h> #include <net/seg6.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/inet_common.h> #include <net/dsfield.h> #include <net/l3mdev.h> #include <linux/uaccess.h> static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net_rcu(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) ping_err(skb, offset, ntohl(info)); return 0; } static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; /* Called with BH disabled */ static struct sock *icmpv6_xmit_lock(struct net *net) { struct sock *sk; sk = this_cpu_read(ipv6_icmp_sk); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. */ return NULL; } sock_net_set(sk, net); return sk; } static void icmpv6_xmit_unlock(struct sock *sk) { sock_net_set(sk, &init_net); spin_unlock(&sk->sk_lock.slock); } /* * Figure out, may we reply to this packet with icmp error. * * We do not reply, if: * - it was icmp error message. * - it is truncated, so that it is known, that protocol is ICMPV6 * (i.e. in the middle of some exthdr) * * --ANK (980726) */ static bool is_ineligible(const struct sk_buff *skb) { int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; int len = skb->len - ptr; __u8 nexthdr = ipv6_hdr(skb)->nexthdr; __be16 frag_off; if (len < 0) return true; ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); if (ptr < 0) return false; if (nexthdr == IPPROTO_ICMPV6) { u8 _type, *tp; tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); /* Based on RFC 8200, Section 4.5 Fragment Header, return * false if this is a fragment packet with no icmp header info. */ if (!tp && frag_off != 0) return false; else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; } static bool icmpv6_mask_allow(struct net *net, int type) { if (type > ICMPV6_MSG_MAX) return true; /* Limit if icmp type is set in ratemask. */ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) return true; return false; } static bool icmpv6_global_allow(struct net *net, int type, bool *apply_ratelimit) { if (icmpv6_mask_allow(net, type)) return true; if (icmp_global_allow(net)) { *apply_ratelimit = true; return true; } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } /* * Check the ICMP output rate limit */ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; if (!apply_ratelimit) return true; /* * Look up the output route. * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = true; } else { struct rt6_info *rt = dst_rt6_info(dst); int tmo = net->ipv6.sysctl.icmpv6_time; struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); rcu_read_unlock(); } if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), ICMP6_MIB_RATELIMITHOST); else icmp_global_consume(net); dst_release(dst); return res; } static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, struct flowi6 *fl6) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; dst = ip6_route_output(net, sk, fl6); if (!dst->error) { struct rt6_info *rt = dst_rt6_info(dst); struct in6_addr prefsrc; rt6_get_prefsrc(rt, &prefsrc); res = !ipv6_addr_any(&prefsrc); } dst_release(dst); return res; } /* * an inline helper for the "simple" if statement below * checks if parameter problem report is caused by an * unrecognized IPv6 option that has the Option Type * highest-order two bits set to 10 */ static bool opt_unrec(struct sk_buff *skb, __u32 offset) { u8 _optval, *op; offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); if (!op) return true; return (*op & 0xC0) == 0x80; } void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; skb = skb_peek(&sk->sk_write_queue); if (!skb) return; icmp6h = icmp6_hdr(skb); memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, skb->csum); } else { __wsum tmp_csum = 0; skb_queue_walk(&sk->sk_write_queue, skb) { tmp_csum = csum_add(tmp_csum, skb->csum); } tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, tmp_csum); } ip6_push_pending_frames(sk); } struct icmpv6_msg { struct sk_buff *skb; int offset; uint8_t type; }; static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct icmpv6_msg *msg = (struct icmpv6_msg *) from; struct sk_buff *org_skb = msg->skb; __wsum csum; csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, to, len); skb->csum = csum_block_add(skb->csum, csum, odd); if (!(msg->type & ICMPV6_INFOMSG_MASK)) nf_ct_attach(skb, org_skb); return 0; } #if IS_ENABLED(CONFIG_IPV6_MIP6) static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) { struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hao *hao; int off; if (opt->dsthao) { off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); if (likely(off >= 0)) { hao = (struct ipv6_destopt_hao *) (skb_network_header(skb) + off); swap(iph->saddr, hao->addr); } } } #else static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; int err; err = ip6_dst_lookup(net, sk, &dst, fl6); if (err) return ERR_PTR(err); /* * We won't send icmp if the destination is known * anycast unless we need to treat anycast as unicast. */ if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) && ipv6_anycast_destination(dst, &fl6->daddr)) { net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } /* No need to clone since we're just using its address. */ dst2 = dst; dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); if (!IS_ERR(dst)) { if (dst != dst2) return dst; } else { if (PTR_ERR(dst) == -EPERM) dst = NULL; else return dst; } err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; err = ip6_dst_lookup(net, sk, &dst2, &fl2); if (err) goto relookup_failed; dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); if (!IS_ERR(dst2)) { dst_release(dst); dst = dst2; } else { err = PTR_ERR(dst2); if (err == -EPERM) { dst_release(dst); return dst2; } else goto relookup_failed; } relookup_failed: if (dst) return dst; return ERR_PTR(err); } static struct net_device *icmp6_dev(const struct sk_buff *skb) { struct net_device *dev = skb->dev; /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so * get the real device index. Same is needed for replies to a link * local address on a device enslaved to an L3 master device */ if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), * and ip6_null_entry could be set to skb if no route is found. */ if (rt6 && rt6->rt6i_idev) dev = rt6->rt6i_idev->dev; } return dev; } static int icmp6_iif(const struct sk_buff *skb) { return icmp6_dev(skb)->ifindex; } /* * Send an ICMP message in response to a packet in error */ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm) { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; bool apply_ratelimit = false; struct dst_entry *dst; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; struct ipcm6_cookie ipc6; int iif = 0; int addr_type = 0; int len; u32 mark; if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; if (!skb->dev) return; rcu_read_lock(); net = dev_net_rcu(skb->dev); mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) * Rule (e.1) is enforced by not using icmp6_send * in any code that processes icmp errors. */ addr_type = ipv6_addr_type(&hdr->daddr); if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) saddr = &hdr->daddr; /* * Dest addr check */ if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) goto out; saddr = NULL; } addr_type = ipv6_addr_type(&hdr->saddr); /* * Source addr check */ if (__ipv6_addr_needs_scope_id(addr_type)) { iif = icmp6_iif(skb); } else { /* * The source device is used for looking up which routing table * to use for sending an ICMP error. */ iif = l3mdev_master_ifindex(skb->dev); } /* * Must not send error if the source does not uniquely * identify a single node (RFC2463 Section 2.4). * We check unspecified / multicast addresses here, * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out; } /* * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out; } /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type, &apply_ratelimit)) goto out_bh_enable; mip6_addr_swap(skb, parm); sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = hdr->saddr; if (force_saddr) saddr = force_saddr; if (saddr) { fl6.saddr = *saddr; } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { /* select a more meaningful saddr from input if */ struct net_device *in_netdev; in_netdev = dev_get_by_index(net, parm->iif); if (in_netdev) { ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, inet6_sk(sk)->srcprefs, &fl6.saddr); dev_put(in_netdev); } } fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) goto out_unlock; tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_code = code; tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = READ_ONCE(np->ucast_oif); ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out_unlock; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); msg.type = type; len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out_dst_release; } idev = __in6_dev_get(skb->dev); if (ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); } out_dst_release: dst_release(dst); out_unlock: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); out: rcu_read_unlock(); } EXPORT_SYMBOL(icmp6_send); /* Slightly more convenient version of icmp6_send with drop reasons. */ void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, enum skb_drop_reason reason) { icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); kfree_skb_reason(skb, reason); } /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH * if sufficient data bytes are available * @nhs is the size of the tunnel header(s) : * Either an IPv4 header for SIT encap * an IPv4 header + GRE header for GRE encap */ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len) { struct in6_addr temp_saddr; struct rt6_info *rt; struct sk_buff *skb2; u32 info = 0; if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) return 1; /* RFC 4884 (partial) support for ICMP extensions */ if (data_len < 128 || (data_len & 7) || skb->len < data_len) data_len = 0; skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); if (!skb2) return 1; skb_dst_drop(skb2); skb_pull(skb2, nhs); skb_reset_network_header(skb2); rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); if (data_len) { /* RFC 4884 (partial) support : * insert 0 padding at the end, before the extensions */ __skb_push(skb2, nhs); skb_reset_network_header(skb2); memmove(skb2->data, skb2->data + nhs, data_len - nhs); memset(skb2->data + data_len - nhs, 0, nhs); /* RFC 4884 4.5 : Length is measured in 64-bit words, * and stored in reserved[0] */ info = (data_len/8) << 24; } if (type == ICMP_TIME_EXCEEDED) icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, info, &temp_saddr, IP6CB(skb2)); else icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, info, &temp_saddr, IP6CB(skb2)); if (rt) ip6_rt_put(rt); kfree_skb(skb2); return 0; } EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) { struct net *net = dev_net_rcu(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); bool apply_ratelimit = false; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); SKB_DR(reason); bool acast; u8 type; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && net->ipv6.sysctl.icmpv6_echo_ignore_multicast) return reason; saddr = &ipv6_hdr(skb)->daddr; acast = ipv6_anycast_destination(skb_dst(skb), saddr); if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) return reason; if (!ipv6_unicast_destination(skb) && !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) saddr = NULL; if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) type = ICMPV6_EXT_ECHO_REPLY; else type = ICMPV6_ECHO_REPLY; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = type; memset(&fl6, 0, sizeof(fl6)); if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; fl6.flowi6_oif = icmp6_iif(skb); fl6.fl6_icmp_type = type; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); local_bh_disable(); sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = READ_ONCE(np->ucast_oif); if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) goto out; /* Check the ratelimit */ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) goto out_dst_release; idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; msg.type = type; ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) goto out_dst_release; if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); reason = SKB_CONSUMED; } out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); return reason; } enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net_rcu(skb->dev); const struct inet6_protocol *ipprot; enum skb_drop_reason reason; int inner_offset; __be16 frag_off; u8 nexthdr; reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr)); if (reason != SKB_NOT_DROPPED_YET) goto out; seg6_icmp_srh(skb, opt); nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { /* now skip over extension headers */ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (inner_offset < 0) { SKB_DR_SET(reason, IPV6_BAD_EXTHDR); goto out; } } else { inner_offset = sizeof(struct ipv6hdr); } /* Checkin header including 8 bytes of inner protocol header. */ reason = pskb_may_pull_reason(skb, inner_offset + 8); if (reason != SKB_NOT_DROPPED_YET) goto out; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed pmtu discovery. Corresponding argument (opt) to notifiers is already added. --ANK (980726) */ ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, opt, type, code, inner_offset, info); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); return SKB_CONSUMED; out: __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return reason; } /* * Handle icmp messages */ static int icmpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net *net = dev_net_rcu(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) { reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; } if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*hdr)); if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) { reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; } skb_set_network_header(skb, nh); } __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } if (!pskb_pull(skb, sizeof(*hdr))) goto discard_it; hdr = icmp6_hdr(skb); type = hdr->icmp6_type; ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) reason = icmpv6_echo_reply(skb); break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) reason = icmpv6_echo_reply(skb); break; case ICMPV6_ECHO_REPLY: reason = ping_rcv(skb); break; case ICMPV6_EXT_ECHO_REPLY: reason = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: /* BUGGG_FUTURE: if packet contains rthdr, we cannot update standard destination cache. Seems, only "advanced" destination cache will allow to solve this problem --ANK (980726) */ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); /* to notify */ fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: reason = icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); break; case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: reason = ndisc_rcv(skb); break; case ICMPV6_MGM_QUERY: igmp6_event_query(skb); return 0; case ICMPV6_MGM_REPORT: igmp6_event_report(skb); return 0; case ICMPV6_MGM_REDUCTION: case ICMPV6_NI_QUERY: case ICMPV6_NI_REPLY: case ICMPV6_MLD2_REPORT: case ICMPV6_DHAAD_REQUEST: case ICMPV6_DHAAD_REPLY: case ICMPV6_MOBILE_PREFIX_SOL: case ICMPV6_MOBILE_PREFIX_ADV: break; default: /* informational */ if (type & ICMPV6_INFOMSG_MASK) break; net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", saddr, daddr); /* * error of unknown type. * must pass to upper level */ reason = icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); } /* until the v6 path can be better sorted assume failure and * preserve the status quo behaviour for the rest of the paths to here */ if (reason) kfree_skb_reason(skb, reason); else consume_skb(skb); return 0; csum_error: reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb_reason(skb, reason); return 0; } void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif) { memset(fl6, 0, sizeof(*fl6)); fl6->saddr = *saddr; fl6->daddr = *daddr; fl6->flowi6_proto = IPPROTO_ICMPV6; fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } int __init icmpv6_init(void) { struct sock *sk; int err, i; for_each_possible_cpu(i) { err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &init_net); if (err < 0) { pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", err); return err; } per_cpu(ipv6_icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } err = -EAGAIN; if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) goto fail; err = inet6_register_icmp_sender(icmp6_send); if (err) goto sender_reg_err; return 0; sender_reg_err: inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); return err; } void icmpv6_cleanup(void) { inet6_unregister_icmp_sender(icmp6_send); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } static const struct icmp6_err { int err; int fatal; } tab_unreach[] = { { /* NOROUTE */ .err = ENETUNREACH, .fatal = 0, }, { /* ADM_PROHIBITED */ .err = EACCES, .fatal = 1, }, { /* Was NOT_NEIGHBOUR, now reserved */ .err = EHOSTUNREACH, .fatal = 0, }, { /* ADDR_UNREACH */ .err = EHOSTUNREACH, .fatal = 0, }, { /* PORT_UNREACH */ .err = ECONNREFUSED, .fatal = 1, }, { /* POLICY_FAIL */ .err = EACCES, .fatal = 1, }, { /* REJECT_ROUTE */ .err = EACCES, .fatal = 1, }, }; int icmpv6_err_convert(u8 type, u8 code, int *err) { int fatal = 0; *err = EPROTO; switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; if (code < ARRAY_SIZE(tab_unreach)) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } break; case ICMPV6_PKT_TOOBIG: *err = EMSGSIZE; break; case ICMPV6_PARAMPROB: *err = EPROTO; fatal = 1; break; case ICMPV6_TIME_EXCEED: *err = EHOSTUNREACH; break; } return fatal; } EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, { .procname = "echo_ignore_all", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_multicast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_anycast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ratemask", .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, .maxlen = ICMPV6_MSG_MAX + 1, .mode = 0644, .proc_handler = proc_do_large_bitmap, }, { .procname = "error_anycast_as_unicast", .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) { struct ctl_table *table; table = kmemdup(ipv6_icmp_table_template, sizeof(ipv6_icmp_table_template), GFP_KERNEL); if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast; } return table; } size_t ipv6_icmp_sysctl_table_size(void) { return ARRAY_SIZE(ipv6_icmp_table_template); } #endif |
100 18 18 255 5 251 250 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2021 Oracle Corporation */ #include <linux/slab.h> #include <linux/completion.h> #include <linux/sched/task.h> #include <linux/sched/vhost_task.h> #include <linux/sched/signal.h> enum vhost_task_flags { VHOST_TASK_FLAGS_STOP, VHOST_TASK_FLAGS_KILLED, }; struct vhost_task { bool (*fn)(void *data); void (*handle_sigkill)(void *data); void *data; struct completion exited; unsigned long flags; struct task_struct *task; /* serialize SIGKILL and vhost_task_stop calls */ struct mutex exit_mutex; }; static int vhost_task_fn(void *data) { struct vhost_task *vtsk = data; for (;;) { bool did_work; if (signal_pending(current)) { struct ksignal ksig; if (get_signal(&ksig)) break; } /* mb paired w/ vhost_task_stop */ set_current_state(TASK_INTERRUPTIBLE); if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { __set_current_state(TASK_RUNNING); break; } did_work = vtsk->fn(vtsk->data); if (!did_work) schedule(); } mutex_lock(&vtsk->exit_mutex); /* * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL. * When the vhost layer has called vhost_task_stop it's already stopped * new work and flushed. */ if (!test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags); vtsk->handle_sigkill(vtsk->data); } mutex_unlock(&vtsk->exit_mutex); complete(&vtsk->exited); do_exit(0); } /** * vhost_task_wake - wakeup the vhost_task * @vtsk: vhost_task to wake * * wake up the vhost_task worker thread */ void vhost_task_wake(struct vhost_task *vtsk) { wake_up_process(vtsk->task); } EXPORT_SYMBOL_GPL(vhost_task_wake); /** * vhost_task_stop - stop a vhost_task * @vtsk: vhost_task to stop * * vhost_task_fn ensures the worker thread exits after * VHOST_TASK_FLAGS_STOP becomes true. */ void vhost_task_stop(struct vhost_task *vtsk) { mutex_lock(&vtsk->exit_mutex); if (!test_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags)) { set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); vhost_task_wake(vtsk); } mutex_unlock(&vtsk->exit_mutex); /* * Make sure vhost_task_fn is no longer accessing the vhost_task before * freeing it below. */ wait_for_completion(&vtsk->exited); kfree(vtsk); } EXPORT_SYMBOL_GPL(vhost_task_stop); /** * vhost_task_create - create a copy of a task to be used by the kernel * @fn: vhost worker function * @handle_sigkill: vhost function to handle when we are killed * @arg: data to be passed to fn and handled_kill * @name: the thread's name * * This returns a specialized task for use by the vhost layer or NULL on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ struct vhost_task *vhost_task_create(bool (*fn)(void *), void (*handle_sigkill)(void *), void *arg, const char *name) { struct kernel_clone_args args = { .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM | CLONE_THREAD | CLONE_SIGHAND, .exit_signal = 0, .fn = vhost_task_fn, .name = name, .user_worker = 1, .no_files = 1, }; struct vhost_task *vtsk; struct task_struct *tsk; vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL); if (!vtsk) return NULL; init_completion(&vtsk->exited); mutex_init(&vtsk->exit_mutex); vtsk->data = arg; vtsk->fn = fn; vtsk->handle_sigkill = handle_sigkill; args.fn_arg = vtsk; tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); return NULL; } vtsk->task = tsk; return vtsk; } EXPORT_SYMBOL_GPL(vhost_task_create); /** * vhost_task_start - start a vhost_task created with vhost_task_create * @vtsk: vhost_task to wake up */ void vhost_task_start(struct vhost_task *vtsk) { wake_up_new_task(vtsk->task); } EXPORT_SYMBOL_GPL(vhost_task_start); |
84 159 2 41 41 21 25 2 115 28 75 23 75 41 41 2 61 2 2 2 1 74 2 73 74 89 159 89 223 89 160 39 61 224 223 208 92 149 8 8 8 6 2 35 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 | // SPDX-License-Identifier: GPL-2.0 /* * buffered writeback throttling. loosely based on CoDel. We can't drop * packets for IO scheduling, so the logic is something like this: * * - Monitor latencies in a defined window of time. * - If the minimum latency in the above window exceeds some target, increment * scaling step and scale down queue depth by a factor of 2x. The monitoring * window is then shrunk to 100 / sqrt(scaling step + 1). * - For any window where we don't have solid data on what the latencies * look like, retain status quo. * - If latencies look good, decrement scaling step. * - If we're only doing writes, allow the scaling step to go negative. This * will temporarily boost write performance, snapping back to a stable * scaling step of 0 if reads show up or the heavy writers finish. Unlike * positive scaling steps where we shrink the monitoring window, a negative * scaling step retains the default step==0 window size. * * Copyright (C) 2016 Jens Axboe * */ #include <linux/kernel.h> #include <linux/blk_types.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/swap.h> #include "blk-stat.h" #include "blk-wbt.h" #include "blk-rq-qos.h" #include "elevator.h" #include "blk.h" #define CREATE_TRACE_POINTS #include <trace/events/wbt.h> enum wbt_flags { WBT_TRACKED = 1, /* write, tracked for throttling */ WBT_READ = 2, /* read */ WBT_SWAP = 4, /* write, from swap_writepage() */ WBT_DISCARD = 8, /* discard */ WBT_NR_BITS = 4, /* number of bits */ }; enum { WBT_RWQ_BG = 0, WBT_RWQ_SWAP, WBT_RWQ_DISCARD, WBT_NUM_RWQ, }; /* * If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other * state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered * to WBT_STATE_OFF/ON_MANUAL. */ enum { WBT_STATE_ON_DEFAULT = 1, /* on by default */ WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */ WBT_STATE_OFF_DEFAULT = 3, /* off by default */ WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */ }; struct rq_wb { /* * Settings that govern how we throttle */ unsigned int wb_background; /* background writeback */ unsigned int wb_normal; /* normal writeback */ short enable_state; /* WBT_STATE_* */ /* * Number of consecutive periods where we don't have enough * information to make a firm scale up/down decision. */ unsigned int unknown_cnt; u64 win_nsec; /* default window size */ u64 cur_win_nsec; /* current window size */ struct blk_stat_callback *cb; u64 sync_issue; void *sync_cookie; unsigned long last_issue; /* last non-throttled issue */ unsigned long last_comp; /* last non-throttled comp */ unsigned long min_lat_nsec; struct rq_qos rqos; struct rq_wait rq_wait[WBT_NUM_RWQ]; struct rq_depth rq_depth; }; static inline struct rq_wb *RQWB(struct rq_qos *rqos) { return container_of(rqos, struct rq_wb, rqos); } static inline void wbt_clear_state(struct request *rq) { rq->wbt_flags = 0; } static inline enum wbt_flags wbt_flags(struct request *rq) { return rq->wbt_flags; } static inline bool wbt_is_tracked(struct request *rq) { return rq->wbt_flags & WBT_TRACKED; } static inline bool wbt_is_read(struct request *rq) { return rq->wbt_flags & WBT_READ; } enum { /* * Default setting, we'll scale up (to 75% of QD max) or down (min 1) * from here depending on device stats */ RWB_DEF_DEPTH = 16, /* * 100msec window */ RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL, /* * Disregard stats, if we don't meet this minimum */ RWB_MIN_WRITE_SAMPLES = 3, /* * If we have this number of consecutive windows with not enough * information to scale up or down, scale up. */ RWB_UNKNOWN_BUMP = 5, }; static inline bool rwb_enabled(struct rq_wb *rwb) { return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && rwb->enable_state != WBT_STATE_OFF_MANUAL; } static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) { if (rwb_enabled(rwb)) { const unsigned long cur = jiffies; if (cur != *var) *var = cur; } } /* * If a task was rate throttled in balance_dirty_pages() within the last * second or so, use that to indicate a higher cleaning rate. */ static bool wb_recent_wait(struct rq_wb *rwb) { struct backing_dev_info *bdi = rwb->rqos.disk->bdi; return time_before(jiffies, bdi->last_bdp_sleep + HZ); } static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, enum wbt_flags wb_acct) { if (wb_acct & WBT_SWAP) return &rwb->rq_wait[WBT_RWQ_SWAP]; else if (wb_acct & WBT_DISCARD) return &rwb->rq_wait[WBT_RWQ_DISCARD]; return &rwb->rq_wait[WBT_RWQ_BG]; } static void rwb_wake_all(struct rq_wb *rwb) { int i; for (i = 0; i < WBT_NUM_RWQ; i++) { struct rq_wait *rqw = &rwb->rq_wait[i]; if (wq_has_sleeper(&rqw->wait)) wake_up_all(&rqw->wait); } } static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, enum wbt_flags wb_acct) { int inflight, limit; inflight = atomic_dec_return(&rqw->inflight); /* * For discards, our limit is always the background. For writes, if * the device does write back caching, drop further down before we * wake people up. */ if (wb_acct & WBT_DISCARD) limit = rwb->wb_background; else if (blk_queue_write_cache(rwb->rqos.disk->queue) && !wb_recent_wait(rwb)) limit = 0; else limit = rwb->wb_normal; /* * Don't wake anyone up if we are above the normal limit. */ if (inflight && inflight >= limit) return; if (wq_has_sleeper(&rqw->wait)) { int diff = limit - inflight; if (!inflight || diff >= rwb->wb_background / 2) wake_up_all(&rqw->wait); } } static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) { struct rq_wb *rwb = RQWB(rqos); struct rq_wait *rqw; if (!(wb_acct & WBT_TRACKED)) return; rqw = get_rq_wait(rwb, wb_acct); wbt_rqw_done(rwb, rqw, wb_acct); } /* * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. */ static void wbt_done(struct rq_qos *rqos, struct request *rq) { struct rq_wb *rwb = RQWB(rqos); if (!wbt_is_tracked(rq)) { if (rwb->sync_cookie == rq) { rwb->sync_issue = 0; rwb->sync_cookie = NULL; } if (wbt_is_read(rq)) wb_timestamp(rwb, &rwb->last_comp); } else { WARN_ON_ONCE(rq == rwb->sync_cookie); __wbt_done(rqos, wbt_flags(rq)); } wbt_clear_state(rq); } static inline bool stat_sample_valid(struct blk_rq_stat *stat) { /* * We need at least one read sample, and a minimum of * RWB_MIN_WRITE_SAMPLES. We require some write samples to know * that it's writes impacting us, and not just some sole read on * a device that is in a lower power state. */ return (stat[READ].nr_samples >= 1 && stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES); } static u64 rwb_sync_issue_lat(struct rq_wb *rwb) { u64 issue = READ_ONCE(rwb->sync_issue); if (!issue || !rwb->sync_cookie) return 0; return blk_time_get_ns() - issue; } static inline unsigned int wbt_inflight(struct rq_wb *rwb) { unsigned int i, ret = 0; for (i = 0; i < WBT_NUM_RWQ; i++) ret += atomic_read(&rwb->rq_wait[i].inflight); return ret; } enum { LAT_OK = 1, LAT_UNKNOWN, LAT_UNKNOWN_WRITES, LAT_EXCEEDED, }; static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) { struct backing_dev_info *bdi = rwb->rqos.disk->bdi; struct rq_depth *rqd = &rwb->rq_depth; u64 thislat; /* * If our stored sync issue exceeds the window size, or it * exceeds our min target AND we haven't logged any entries, * flag the latency as exceeded. wbt works off completion latencies, * but for a flooded device, a single sync IO can take a long time * to complete after being issued. If this time exceeds our * monitoring window AND we didn't see any other completions in that * window, then count that sync IO as a violation of the latency. */ thislat = rwb_sync_issue_lat(rwb); if (thislat > rwb->cur_win_nsec || (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { trace_wbt_lat(bdi, thislat); return LAT_EXCEEDED; } /* * No read/write mix, if stat isn't valid */ if (!stat_sample_valid(stat)) { /* * If we had writes in this stat window and the window is * current, we're only doing writes. If a task recently * waited or still has writes in flights, consider us doing * just writes as well. */ if (stat[WRITE].nr_samples || wb_recent_wait(rwb) || wbt_inflight(rwb)) return LAT_UNKNOWN_WRITES; return LAT_UNKNOWN; } /* * If the 'min' latency exceeds our target, step down. */ if (stat[READ].min > rwb->min_lat_nsec) { trace_wbt_lat(bdi, stat[READ].min); trace_wbt_stat(bdi, stat); return LAT_EXCEEDED; } if (rqd->scale_step) trace_wbt_stat(bdi, stat); return LAT_OK; } static void rwb_trace_step(struct rq_wb *rwb, const char *msg) { struct backing_dev_info *bdi = rwb->rqos.disk->bdi; struct rq_depth *rqd = &rwb->rq_depth; trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, rwb->wb_background, rwb->wb_normal, rqd->max_depth); } static void calc_wb_limits(struct rq_wb *rwb) { if (rwb->min_lat_nsec == 0) { rwb->wb_normal = rwb->wb_background = 0; } else if (rwb->rq_depth.max_depth <= 2) { rwb->wb_normal = rwb->rq_depth.max_depth; rwb->wb_background = 1; } else { rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; } } static void scale_up(struct rq_wb *rwb) { if (!rq_depth_scale_up(&rwb->rq_depth)) return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_wake_all(rwb); rwb_trace_step(rwb, tracepoint_string("scale up")); } static void scale_down(struct rq_wb *rwb, bool hard_throttle) { if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_trace_step(rwb, tracepoint_string("scale down")); } static void rwb_arm_timer(struct rq_wb *rwb) { struct rq_depth *rqd = &rwb->rq_depth; if (rqd->scale_step > 0) { /* * We should speed this up, using some variant of a fast * integer inverse square root calculation. Since we only do * this for every window expiration, it's not a huge deal, * though. */ rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, int_sqrt((rqd->scale_step + 1) << 8)); } else { /* * For step < 0, we don't want to increase/decrease the * window size. */ rwb->cur_win_nsec = rwb->win_nsec; } blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); } static void wb_timer_fn(struct blk_stat_callback *cb) { struct rq_wb *rwb = cb->data; struct rq_depth *rqd = &rwb->rq_depth; unsigned int inflight = wbt_inflight(rwb); int status; if (!rwb->rqos.disk) return; status = latency_exceeded(rwb, cb->stat); trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight); /* * If we exceeded the latency target, step down. If we did not, * step one level up. If we don't know enough to say either exceeded * or ok, then don't do anything. */ switch (status) { case LAT_EXCEEDED: scale_down(rwb, true); break; case LAT_OK: scale_up(rwb); break; case LAT_UNKNOWN_WRITES: /* * We started a the center step, but don't have a valid * read/write sample, but we do have writes going on. * Allow step to go negative, to increase write perf. */ scale_up(rwb); break; case LAT_UNKNOWN: if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) break; /* * We get here when previously scaled reduced depth, and we * currently don't have a valid read/write sample. For that * case, slowly return to center state (step == 0). */ if (rqd->scale_step > 0) scale_up(rwb); else if (rqd->scale_step < 0) scale_down(rwb, false); break; default: break; } /* * Re-arm timer, if we have IO in flight */ if (rqd->scale_step || inflight) rwb_arm_timer(rwb); } static void wbt_update_limits(struct rq_wb *rwb) { struct rq_depth *rqd = &rwb->rq_depth; rqd->scale_step = 0; rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); calc_wb_limits(rwb); rwb_wake_all(rwb); } bool wbt_disabled(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); return !rqos || !rwb_enabled(RQWB(rqos)); } u64 wbt_get_min_lat(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); if (!rqos) return 0; return RQWB(rqos)->min_lat_nsec; } void wbt_set_min_lat(struct request_queue *q, u64 val) { struct rq_qos *rqos = wbt_rq_qos(q); if (!rqos) return; RQWB(rqos)->min_lat_nsec = val; if (val) RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; else RQWB(rqos)->enable_state = WBT_STATE_OFF_MANUAL; wbt_update_limits(RQWB(rqos)); } static bool close_io(struct rq_wb *rwb) { const unsigned long now = jiffies; return time_before(now, rwb->last_issue + HZ / 10) || time_before(now, rwb->last_comp + HZ / 10); } #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP) static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) { unsigned int limit; if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD) return rwb->wb_background; /* * At this point we know it's a buffered write. If this is * swap trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb)) limit = rwb->rq_depth.max_depth; else if ((opf & REQ_BACKGROUND) || close_io(rwb)) { /* * If less than 100ms since we completed unrelated IO, * limit us to half the depth for background writeback. */ limit = rwb->wb_background; } else limit = rwb->wb_normal; return limit; } struct wbt_wait_data { struct rq_wb *rwb; enum wbt_flags wb_acct; blk_opf_t opf; }; static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf)); } static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; wbt_rqw_done(data->rwb, rqw, data->wb_acct); } /* * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. */ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, blk_opf_t opf) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); struct wbt_wait_data data = { .rwb = rwb, .wb_acct = wb_acct, .opf = opf, }; rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); } static inline bool wbt_should_throttle(struct bio *bio) { switch (bio_op(bio)) { case REQ_OP_WRITE: /* * Don't throttle WRITE_ODIRECT */ if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE)) return false; fallthrough; case REQ_OP_DISCARD: return true; default: return false; } } static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) { enum wbt_flags flags = 0; if (!rwb_enabled(rwb)) return 0; if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; } else if (wbt_should_throttle(bio)) { if (bio->bi_opf & REQ_SWAP) flags |= WBT_SWAP; if (bio_op(bio) == REQ_OP_DISCARD) flags |= WBT_DISCARD; flags |= WBT_TRACKED; } return flags; } static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio) { struct rq_wb *rwb = RQWB(rqos); enum wbt_flags flags = bio_to_wbt_flags(rwb, bio); __wbt_done(rqos, flags); } /* * May sleep, if we have exceeded the writeback limits. Caller can pass * in an irq held spinlock, if it holds one when calling this function. * If we do sleep, we'll release and re-grab it. */ static void wbt_wait(struct rq_qos *rqos, struct bio *bio) { struct rq_wb *rwb = RQWB(rqos); enum wbt_flags flags; flags = bio_to_wbt_flags(rwb, bio); if (!(flags & WBT_TRACKED)) { if (flags & WBT_READ) wb_timestamp(rwb, &rwb->last_issue); return; } __wbt_wait(rwb, flags, bio->bi_opf); if (!blk_stat_is_active(rwb->cb)) rwb_arm_timer(rwb); } static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct rq_wb *rwb = RQWB(rqos); rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); } static void wbt_issue(struct rq_qos *rqos, struct request *rq) { struct rq_wb *rwb = RQWB(rqos); if (!rwb_enabled(rwb)) return; /* * Track sync issue, in case it takes a long time to complete. Allows us * to react quicker, if a sync IO takes a long time to complete. Note * that this is just a hint. The request can go away when it completes, * so it's important we never dereference it. We only use the address to * compare with, which is why we store the sync_issue time locally. */ if (wbt_is_read(rq) && !rwb->sync_issue) { rwb->sync_cookie = rq; rwb->sync_issue = rq->io_start_time_ns; } } static void wbt_requeue(struct rq_qos *rqos, struct request *rq) { struct rq_wb *rwb = RQWB(rqos); if (!rwb_enabled(rwb)) return; if (rq == rwb->sync_cookie) { rwb->sync_issue = 0; rwb->sync_cookie = NULL; } } /* * Enable wbt if defaults are configured that way */ void wbt_enable_default(struct gendisk *disk) { struct request_queue *q = disk->queue; struct rq_qos *rqos; bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ); if (q->elevator && test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags)) enable = false; /* Throttling already enabled? */ rqos = wbt_rq_qos(q); if (rqos) { if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; return; } /* Queue not registered? Maybe shutting down... */ if (!blk_queue_registered(q)) return; if (queue_is_mq(q) && enable) wbt_init(disk); } EXPORT_SYMBOL_GPL(wbt_enable_default); u64 wbt_default_latency_nsec(struct request_queue *q) { /* * We default to 2msec for non-rotational storage, and 75msec * for rotational storage. */ if (blk_queue_nonrot(q)) return 2000000ULL; else return 75000000ULL; } static int wbt_data_dir(const struct request *rq) { const enum req_op op = req_op(rq); if (op == REQ_OP_READ) return READ; else if (op_is_write(op)) return WRITE; /* don't account */ return -1; } static void wbt_queue_depth_changed(struct rq_qos *rqos) { RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); wbt_update_limits(RQWB(rqos)); } static void wbt_exit(struct rq_qos *rqos) { struct rq_wb *rwb = RQWB(rqos); blk_stat_remove_callback(rqos->disk->queue, rwb->cb); blk_stat_free_callback(rwb->cb); kfree(rwb); } /* * Disable wbt, if enabled by default. */ void wbt_disable_default(struct gendisk *disk) { struct rq_qos *rqos = wbt_rq_qos(disk->queue); struct rq_wb *rwb; if (!rqos) return; rwb = RQWB(rqos); if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { blk_stat_deactivate(rwb->cb); rwb->enable_state = WBT_STATE_OFF_DEFAULT; } } EXPORT_SYMBOL_GPL(wbt_disable_default); #ifdef CONFIG_BLK_DEBUG_FS static int wbt_curr_win_nsec_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%llu\n", rwb->cur_win_nsec); return 0; } static int wbt_enabled_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%d\n", rwb->enable_state); return 0; } static int wbt_id_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; seq_printf(m, "%u\n", rqos->id); return 0; } static int wbt_inflight_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); int i; for (i = 0; i < WBT_NUM_RWQ; i++) seq_printf(m, "%d: inflight %d\n", i, atomic_read(&rwb->rq_wait[i].inflight)); return 0; } static int wbt_min_lat_nsec_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%lu\n", rwb->min_lat_nsec); return 0; } static int wbt_unknown_cnt_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%u\n", rwb->unknown_cnt); return 0; } static int wbt_normal_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%u\n", rwb->wb_normal); return 0; } static int wbt_background_show(void *data, struct seq_file *m) { struct rq_qos *rqos = data; struct rq_wb *rwb = RQWB(rqos); seq_printf(m, "%u\n", rwb->wb_background); return 0; } static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = { {"curr_win_nsec", 0400, wbt_curr_win_nsec_show}, {"enabled", 0400, wbt_enabled_show}, {"id", 0400, wbt_id_show}, {"inflight", 0400, wbt_inflight_show}, {"min_lat_nsec", 0400, wbt_min_lat_nsec_show}, {"unknown_cnt", 0400, wbt_unknown_cnt_show}, {"wb_normal", 0400, wbt_normal_show}, {"wb_background", 0400, wbt_background_show}, {}, }; #endif static const struct rq_qos_ops wbt_rqos_ops = { .throttle = wbt_wait, .issue = wbt_issue, .track = wbt_track, .requeue = wbt_requeue, .done = wbt_done, .cleanup = wbt_cleanup, .queue_depth_changed = wbt_queue_depth_changed, .exit = wbt_exit, #ifdef CONFIG_BLK_DEBUG_FS .debugfs_attrs = wbt_debugfs_attrs, #endif }; int wbt_init(struct gendisk *disk) { struct request_queue *q = disk->queue; struct rq_wb *rwb; int i; int ret; rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) return -ENOMEM; rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); if (!rwb->cb) { kfree(rwb); return -ENOMEM; } for (i = 0; i < WBT_NUM_RWQ; i++) rq_wait_init(&rwb->rq_wait[i]); rwb->last_comp = rwb->last_issue = jiffies; rwb->win_nsec = RWB_WINDOW_NSEC; rwb->enable_state = WBT_STATE_ON_DEFAULT; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; rwb->min_lat_nsec = wbt_default_latency_nsec(q); rwb->rq_depth.queue_depth = blk_queue_depth(q); wbt_update_limits(rwb); /* * Assign rwb and add the stats callback. */ mutex_lock(&q->rq_qos_mutex); ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); mutex_unlock(&q->rq_qos_mutex); if (ret) goto err_free; blk_stat_add_callback(q, rwb->cb); return 0; err_free: blk_stat_free_callback(rwb->cb); kfree(rwb); return ret; } |
44 44 44 8 8 8 44 44 44 44 44 44 8 8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de> #include <linux/spinlock.h> #include <linux/seq_file.h> #include <linux/bitmap.h> #include <linux/percpu.h> #include <linux/cpu.h> #include <linux/irq.h> struct cpumap { unsigned int available; unsigned int allocated; unsigned int managed; unsigned int managed_allocated; bool initialized; bool online; unsigned long *managed_map; unsigned long alloc_map[]; }; struct irq_matrix { unsigned int matrix_bits; unsigned int alloc_start; unsigned int alloc_end; unsigned int alloc_size; unsigned int global_available; unsigned int global_reserved; unsigned int systembits_inalloc; unsigned int total_allocated; unsigned int online_maps; struct cpumap __percpu *maps; unsigned long *system_map; unsigned long scratch_map[]; }; #define CREATE_TRACE_POINTS #include <trace/events/irq_matrix.h> /** * irq_alloc_matrix - Allocate a irq_matrix structure and initialize it * @matrix_bits: Number of matrix bits must be <= IRQ_MATRIX_BITS * @alloc_start: From which bit the allocation search starts * @alloc_end: At which bit the allocation search ends, i.e first * invalid bit */ __init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, unsigned int alloc_start, unsigned int alloc_end) { unsigned int cpu, matrix_size = BITS_TO_LONGS(matrix_bits); struct irq_matrix *m; m = kzalloc(struct_size(m, scratch_map, matrix_size * 2), GFP_KERNEL); if (!m) return NULL; m->system_map = &m->scratch_map[matrix_size]; m->matrix_bits = matrix_bits; m->alloc_start = alloc_start; m->alloc_end = alloc_end; m->alloc_size = alloc_end - alloc_start; m->maps = __alloc_percpu(struct_size(m->maps, alloc_map, matrix_size * 2), __alignof__(*m->maps)); if (!m->maps) { kfree(m); return NULL; } for_each_possible_cpu(cpu) { struct cpumap *cm = per_cpu_ptr(m->maps, cpu); cm->managed_map = &cm->alloc_map[matrix_size]; } return m; } /** * irq_matrix_online - Bring the local CPU matrix online * @m: Matrix pointer */ void irq_matrix_online(struct irq_matrix *m) { struct cpumap *cm = this_cpu_ptr(m->maps); BUG_ON(cm->online); if (!cm->initialized) { cm->available = m->alloc_size; cm->available -= cm->managed + m->systembits_inalloc; cm->initialized = true; } m->global_available += cm->available; cm->online = true; m->online_maps++; trace_irq_matrix_online(m); } /** * irq_matrix_offline - Bring the local CPU matrix offline * @m: Matrix pointer */ void irq_matrix_offline(struct irq_matrix *m) { struct cpumap *cm = this_cpu_ptr(m->maps); /* Update the global available size */ m->global_available -= cm->available; cm->online = false; m->online_maps--; trace_irq_matrix_offline(m); } static unsigned int matrix_alloc_area(struct irq_matrix *m, struct cpumap *cm, unsigned int num, bool managed) { unsigned int area, start = m->alloc_start; unsigned int end = m->alloc_end; bitmap_or(m->scratch_map, cm->managed_map, m->system_map, end); bitmap_or(m->scratch_map, m->scratch_map, cm->alloc_map, end); area = bitmap_find_next_zero_area(m->scratch_map, end, start, num, 0); if (area >= end) return area; if (managed) bitmap_set(cm->managed_map, area, num); else bitmap_set(cm->alloc_map, area, num); return area; } /* Find the best CPU which has the lowest vector allocation count */ static unsigned int matrix_find_best_cpu(struct irq_matrix *m, const struct cpumask *msk) { unsigned int cpu, best_cpu, maxavl = 0; struct cpumap *cm; best_cpu = UINT_MAX; for_each_cpu(cpu, msk) { cm = per_cpu_ptr(m->maps, cpu); if (!cm->online || cm->available <= maxavl) continue; best_cpu = cpu; maxavl = cm->available; } return best_cpu; } /* Find the best CPU which has the lowest number of managed IRQs allocated */ static unsigned int matrix_find_best_cpu_managed(struct irq_matrix *m, const struct cpumask *msk) { unsigned int cpu, best_cpu, allocated = UINT_MAX; struct cpumap *cm; best_cpu = UINT_MAX; for_each_cpu(cpu, msk) { cm = per_cpu_ptr(m->maps, cpu); if (!cm->online || cm->managed_allocated > allocated) continue; best_cpu = cpu; allocated = cm->managed_allocated; } return best_cpu; } /** * irq_matrix_assign_system - Assign system wide entry in the matrix * @m: Matrix pointer * @bit: Which bit to reserve * @replace: Replace an already allocated vector with a system * vector at the same bit position. * * The BUG_ON()s below are on purpose. If this goes wrong in the * early boot process, then the chance to survive is about zero. * If this happens when the system is life, it's not much better. */ void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace) { struct cpumap *cm = this_cpu_ptr(m->maps); BUG_ON(bit > m->matrix_bits); BUG_ON(m->online_maps > 1 || (m->online_maps && !replace)); set_bit(bit, m->system_map); if (replace) { BUG_ON(!test_and_clear_bit(bit, cm->alloc_map)); cm->allocated--; m->total_allocated--; } if (bit >= m->alloc_start && bit < m->alloc_end) m->systembits_inalloc++; trace_irq_matrix_assign_system(bit, m); } /** * irq_matrix_reserve_managed - Reserve a managed interrupt in a CPU map * @m: Matrix pointer * @msk: On which CPUs the bits should be reserved. * * Can be called for offline CPUs. Note, this will only reserve one bit * on all CPUs in @msk, but it's not guaranteed that the bits are at the * same offset on all CPUs */ int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk) { unsigned int cpu, failed_cpu; for_each_cpu(cpu, msk) { struct cpumap *cm = per_cpu_ptr(m->maps, cpu); unsigned int bit; bit = matrix_alloc_area(m, cm, 1, true); if (bit >= m->alloc_end) goto cleanup; cm->managed++; if (cm->online) { cm->available--; m->global_available--; } trace_irq_matrix_reserve_managed(bit, cpu, m, cm); } return 0; cleanup: failed_cpu = cpu; for_each_cpu(cpu, msk) { if (cpu == failed_cpu) break; irq_matrix_remove_managed(m, cpumask_of(cpu)); } return -ENOSPC; } /** * irq_matrix_remove_managed - Remove managed interrupts in a CPU map * @m: Matrix pointer * @msk: On which CPUs the bits should be removed * * Can be called for offline CPUs * * This removes not allocated managed interrupts from the map. It does * not matter which one because the managed interrupts free their * allocation when they shut down. If not, the accounting is screwed, * but all what can be done at this point is warn about it. */ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk) { unsigned int cpu; for_each_cpu(cpu, msk) { struct cpumap *cm = per_cpu_ptr(m->maps, cpu); unsigned int bit, end = m->alloc_end; if (WARN_ON_ONCE(!cm->managed)) continue; /* Get managed bit which are not allocated */ bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end); bit = find_first_bit(m->scratch_map, end); if (WARN_ON_ONCE(bit >= end)) continue; clear_bit(bit, cm->managed_map); cm->managed--; if (cm->online) { cm->available++; m->global_available++; } trace_irq_matrix_remove_managed(bit, cpu, m, cm); } } /** * irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map * @m: Matrix pointer * @msk: Which CPUs to search in * @mapped_cpu: Pointer to store the CPU for which the irq was allocated */ int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk, unsigned int *mapped_cpu) { unsigned int bit, cpu, end; struct cpumap *cm; if (cpumask_empty(msk)) return -EINVAL; cpu = matrix_find_best_cpu_managed(m, msk); if (cpu == UINT_MAX) return -ENOSPC; cm = per_cpu_ptr(m->maps, cpu); end = m->alloc_end; /* Get managed bit which are not allocated */ bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end); bit = find_first_bit(m->scratch_map, end); if (bit >= end) return -ENOSPC; set_bit(bit, cm->alloc_map); cm->allocated++; cm->managed_allocated++; m->total_allocated++; *mapped_cpu = cpu; trace_irq_matrix_alloc_managed(bit, cpu, m, cm); return bit; } /** * irq_matrix_assign - Assign a preallocated interrupt in the local CPU map * @m: Matrix pointer * @bit: Which bit to mark * * This should only be used to mark preallocated vectors */ void irq_matrix_assign(struct irq_matrix *m, unsigned int bit) { struct cpumap *cm = this_cpu_ptr(m->maps); if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return; if (WARN_ON_ONCE(test_and_set_bit(bit, cm->alloc_map))) return; cm->allocated++; m->total_allocated++; cm->available--; m->global_available--; trace_irq_matrix_assign(bit, smp_processor_id(), m, cm); } /** * irq_matrix_reserve - Reserve interrupts * @m: Matrix pointer * * This is merely a book keeping call. It increments the number of globally * reserved interrupt bits w/o actually allocating them. This allows to * setup interrupt descriptors w/o assigning low level resources to it. * The actual allocation happens when the interrupt gets activated. */ void irq_matrix_reserve(struct irq_matrix *m) { if (m->global_reserved == m->global_available) pr_warn("Interrupt reservation exceeds available resources\n"); m->global_reserved++; trace_irq_matrix_reserve(m); } /** * irq_matrix_remove_reserved - Remove interrupt reservation * @m: Matrix pointer * * This is merely a book keeping call. It decrements the number of globally * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the * interrupt was never in use and a real vector allocated, which undid the * reservation. */ void irq_matrix_remove_reserved(struct irq_matrix *m) { m->global_reserved--; trace_irq_matrix_remove_reserved(m); } /** * irq_matrix_alloc - Allocate a regular interrupt in a CPU map * @m: Matrix pointer * @msk: Which CPUs to search in * @reserved: Allocate previously reserved interrupts * @mapped_cpu: Pointer to store the CPU for which the irq was allocated */ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, bool reserved, unsigned int *mapped_cpu) { unsigned int cpu, bit; struct cpumap *cm; /* * Not required in theory, but matrix_find_best_cpu() uses * for_each_cpu() which ignores the cpumask on UP . */ if (cpumask_empty(msk)) return -EINVAL; cpu = matrix_find_best_cpu(m, msk); if (cpu == UINT_MAX) return -ENOSPC; cm = per_cpu_ptr(m->maps, cpu); bit = matrix_alloc_area(m, cm, 1, false); if (bit >= m->alloc_end) return -ENOSPC; cm->allocated++; cm->available--; m->total_allocated++; m->global_available--; if (reserved) m->global_reserved--; *mapped_cpu = cpu; trace_irq_matrix_alloc(bit, cpu, m, cm); return bit; } /** * irq_matrix_free - Free allocated interrupt in the matrix * @m: Matrix pointer * @cpu: Which CPU map needs be updated * @bit: The bit to remove * @managed: If true, the interrupt is managed and not accounted * as available. */ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, unsigned int bit, bool managed) { struct cpumap *cm = per_cpu_ptr(m->maps, cpu); if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return; if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map))) return; cm->allocated--; if(managed) cm->managed_allocated--; if (cm->online) m->total_allocated--; if (!managed) { cm->available++; if (cm->online) m->global_available++; } trace_irq_matrix_free(bit, cpu, m, cm); } /** * irq_matrix_available - Get the number of globally available irqs * @m: Pointer to the matrix to query * @cpudown: If true, the local CPU is about to go down, adjust * the number of available irqs accordingly */ unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown) { struct cpumap *cm = this_cpu_ptr(m->maps); if (!cpudown) return m->global_available; return m->global_available - cm->available; } /** * irq_matrix_reserved - Get the number of globally reserved irqs * @m: Pointer to the matrix to query */ unsigned int irq_matrix_reserved(struct irq_matrix *m) { return m->global_reserved; } /** * irq_matrix_allocated - Get the number of allocated non-managed irqs on the local CPU * @m: Pointer to the matrix to search * * This returns number of allocated non-managed interrupts. */ unsigned int irq_matrix_allocated(struct irq_matrix *m) { struct cpumap *cm = this_cpu_ptr(m->maps); return cm->allocated - cm->managed_allocated; } #ifdef CONFIG_GENERIC_IRQ_DEBUGFS /** * irq_matrix_debug_show - Show detailed allocation information * @sf: Pointer to the seq_file to print to * @m: Pointer to the matrix allocator * @ind: Indentation for the print format * * Note, this is a lockless snapshot. */ void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind) { unsigned int nsys = bitmap_weight(m->system_map, m->matrix_bits); int cpu; seq_printf(sf, "Online bitmaps: %6u\n", m->online_maps); seq_printf(sf, "Global available: %6u\n", m->global_available); seq_printf(sf, "Global reserved: %6u\n", m->global_reserved); seq_printf(sf, "Total allocated: %6u\n", m->total_allocated); seq_printf(sf, "System: %u: %*pbl\n", nsys, m->matrix_bits, m->system_map); seq_printf(sf, "%*s| CPU | avl | man | mac | act | vectors\n", ind, " "); cpus_read_lock(); for_each_online_cpu(cpu) { struct cpumap *cm = per_cpu_ptr(m->maps, cpu); seq_printf(sf, "%*s %4d %4u %4u %4u %4u %*pbl\n", ind, " ", cpu, cm->available, cm->managed, cm->managed_allocated, cm->allocated, m->matrix_bits, cm->alloc_map); } cpus_read_unlock(); } #endif |
1 2 1 1 1 1 1 1 1 1 1 4 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | /* * llc_station.c - station component of LLC * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <net/llc.h> #include <net/llc_sap.h> #include <net/llc_conn.h> #include <net/llc_c_ac.h> #include <net/llc_s_ac.h> #include <net/llc_c_ev.h> #include <net/llc_c_st.h> #include <net/llc_s_ev.h> #include <net/llc_s_st.h> #include <net/llc_pdu.h> static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && !pdu->dsap; /* NULL DSAP value */ } static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && !pdu->dsap; /* NULL DSAP */ } static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, sizeof(struct llc_xid_info)); if (!nskb) goto out; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 127); rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; dev_queue_xmit(nskb); out: return rc; free: kfree_skb(nskb); goto out; } static int llc_station_ac_send_test_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; u32 data_size; struct sk_buff *nskb; if (skb->mac_len < ETH_HLEN) goto out; /* The test request command is type U (llc_len = 3) */ data_size = ntohs(eth_hdr(skb)->h_proto) - 3; nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); if (!nskb) goto out; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; dev_queue_xmit(nskb); out: return rc; free: kfree_skb(nskb); goto out; } /** * llc_station_rcv - send received pdu to the station state machine * @skb: received frame. * * Sends data unit to station state machine. */ static void llc_station_rcv(struct sk_buff *skb) { if (llc_stat_ev_rx_null_dsap_xid_c(skb)) llc_station_ac_send_xid_r(skb); else if (llc_stat_ev_rx_null_dsap_test_c(skb)) llc_station_ac_send_test_r(skb); kfree_skb(skb); } void __init llc_station_init(void) { llc_set_station_handler(llc_station_rcv); } void llc_station_exit(void) { llc_set_station_handler(NULL); } |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 | // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * */ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/nls.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" // clang-format off const struct cpu_str NAME_MFT = { 4, 0, { '$', 'M', 'F', 'T' }, }; const struct cpu_str NAME_MIRROR = { 8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' }, }; const struct cpu_str NAME_LOGFILE = { 8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' }, }; const struct cpu_str NAME_VOLUME = { 7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' }, }; const struct cpu_str NAME_ATTRDEF = { 8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' }, }; const struct cpu_str NAME_ROOT = { 1, 0, { '.' }, }; const struct cpu_str NAME_BITMAP = { 7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' }, }; const struct cpu_str NAME_BOOT = { 5, 0, { '$', 'B', 'o', 'o', 't' }, }; const struct cpu_str NAME_BADCLUS = { 8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' }, }; const struct cpu_str NAME_QUOTA = { 6, 0, { '$', 'Q', 'u', 'o', 't', 'a' }, }; const struct cpu_str NAME_SECURE = { 7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' }, }; const struct cpu_str NAME_UPCASE = { 7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' }, }; const struct cpu_str NAME_EXTEND = { 7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' }, }; const struct cpu_str NAME_OBJID = { 6, 0, { '$', 'O', 'b', 'j', 'I', 'd' }, }; const struct cpu_str NAME_REPARSE = { 8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' }, }; const struct cpu_str NAME_USNJRNL = { 8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' }, }; const __le16 BAD_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'), }; const __le16 I30_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'), }; const __le16 SII_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'), }; const __le16 SDH_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'), }; const __le16 SDS_NAME[4] = { cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'), }; const __le16 SO_NAME[2] = { cpu_to_le16('$'), cpu_to_le16('O'), }; const __le16 SQ_NAME[2] = { cpu_to_le16('$'), cpu_to_le16('Q'), }; const __le16 SR_NAME[2] = { cpu_to_le16('$'), cpu_to_le16('R'), }; #ifdef CONFIG_NTFS3_LZX_XPRESS const __le16 WOF_NAME[17] = { cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'), cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'), cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'), cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'), cpu_to_le16('a'), }; #endif static const __le16 CON_NAME[3] = { cpu_to_le16('C'), cpu_to_le16('O'), cpu_to_le16('N'), }; static const __le16 NUL_NAME[3] = { cpu_to_le16('N'), cpu_to_le16('U'), cpu_to_le16('L'), }; static const __le16 AUX_NAME[3] = { cpu_to_le16('A'), cpu_to_le16('U'), cpu_to_le16('X'), }; static const __le16 PRN_NAME[3] = { cpu_to_le16('P'), cpu_to_le16('R'), cpu_to_le16('N'), }; static const __le16 COM_NAME[3] = { cpu_to_le16('C'), cpu_to_le16('O'), cpu_to_le16('M'), }; static const __le16 LPT_NAME[3] = { cpu_to_le16('L'), cpu_to_le16('P'), cpu_to_le16('T'), }; // clang-format on /* * ntfs_fix_pre_write - Insert fixups into @rhdr before writing to disk. */ bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes) { u16 *fixup, *ptr; u16 sample; u16 fo = le16_to_cpu(rhdr->fix_off); u16 fn = le16_to_cpu(rhdr->fix_num); if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { return false; } /* Get fixup pointer. */ fixup = Add2Ptr(rhdr, fo); if (*fixup >= 0x7FFF) *fixup = 1; else *fixup += 1; sample = *fixup; ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short)); while (fn--) { *++fixup = *ptr; *ptr = sample; ptr += SECTOR_SIZE / sizeof(short); } return true; } /* * ntfs_fix_post_read - Remove fixups after reading from disk. * * Return: < 0 if error, 0 if ok, 1 if need to update fixups. */ int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple) { int ret; u16 *fixup, *ptr; u16 sample, fo, fn; fo = le16_to_cpu(rhdr->fix_off); fn = simple ? ((bytes >> SECTOR_SHIFT) + 1) : le16_to_cpu(rhdr->fix_num); /* Check errors. */ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { return -E_NTFS_CORRUPT; } /* Get fixup pointer. */ fixup = Add2Ptr(rhdr, fo); sample = *fixup; ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short)); ret = 0; while (fn--) { /* Test current word. */ if (*ptr != sample) { /* Fixup does not match! Is it serious error? */ ret = -E_NTFS_FIXUP; } /* Replace fixup. */ *ptr = *++fixup; ptr += SECTOR_SIZE / sizeof(short); } return ret; } /* * ntfs_extend_init - Load $Extend file. */ int ntfs_extend_init(struct ntfs_sb_info *sbi) { int err; struct super_block *sb = sbi->sb; struct inode *inode, *inode2; struct MFT_REF ref; if (sbi->volume.major_ver < 3) { ntfs_notice(sb, "Skip $Extend 'cause NTFS version"); return 0; } ref.low = cpu_to_le32(MFT_REC_EXTEND); ref.high = 0; ref.seq = cpu_to_le16(MFT_REC_EXTEND); inode = ntfs_iget5(sb, &ref, &NAME_EXTEND); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $Extend (%d).", err); inode = NULL; goto out; } /* If ntfs_iget5() reads from disk it never returns bad inode. */ if (!S_ISDIR(inode->i_mode)) { err = -EINVAL; goto out; } /* Try to find $ObjId */ inode2 = dir_search_u(inode, &NAME_OBJID, NULL); if (inode2 && !IS_ERR(inode2)) { if (is_bad_inode(inode2)) { iput(inode2); } else { sbi->objid.ni = ntfs_i(inode2); sbi->objid_no = inode2->i_ino; } } /* Try to find $Quota */ inode2 = dir_search_u(inode, &NAME_QUOTA, NULL); if (inode2 && !IS_ERR(inode2)) { sbi->quota_no = inode2->i_ino; iput(inode2); } /* Try to find $Reparse */ inode2 = dir_search_u(inode, &NAME_REPARSE, NULL); if (inode2 && !IS_ERR(inode2)) { sbi->reparse.ni = ntfs_i(inode2); sbi->reparse_no = inode2->i_ino; } /* Try to find $UsnJrnl */ inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL); if (inode2 && !IS_ERR(inode2)) { sbi->usn_jrnl_no = inode2->i_ino; iput(inode2); } err = 0; out: iput(inode); return err; } int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi) { int err = 0; struct super_block *sb = sbi->sb; bool initialized = false; struct MFT_REF ref; struct inode *inode; /* Check for 4GB. */ if (ni->vfs_inode.i_size >= 0x100000000ull) { ntfs_err(sb, "\x24LogFile is large than 4G."); err = -EINVAL; goto out; } sbi->flags |= NTFS_FLAGS_LOG_REPLAYING; ref.low = cpu_to_le32(MFT_REC_MFT); ref.high = 0; ref.seq = cpu_to_le16(1); inode = ntfs_iget5(sb, &ref, NULL); if (IS_ERR(inode)) inode = NULL; if (!inode) { /* Try to use MFT copy. */ u64 t64 = sbi->mft.lbo; sbi->mft.lbo = sbi->mft.lbo2; inode = ntfs_iget5(sb, &ref, NULL); sbi->mft.lbo = t64; if (IS_ERR(inode)) inode = NULL; } if (!inode) { err = -EINVAL; ntfs_err(sb, "Failed to load $MFT."); goto out; } sbi->mft.ni = ntfs_i(inode); /* LogFile should not contains attribute list. */ err = ni_load_all_mi(sbi->mft.ni); if (!err) err = log_replay(ni, &initialized); iput(inode); sbi->mft.ni = NULL; sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) { err = 0; goto out; } if (sb_rdonly(sb) || !initialized) goto out; /* Fill LogFile by '-1' if it is initialized. */ err = ntfs_bio_fill_1(sbi, &ni->file.run); out: sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING; return err; } /* * ntfs_look_for_free_space - Look for a free space in bitmap. */ int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, CLST *new_lcn, CLST *new_len, enum ALLOCATE_OPT opt) { int err; CLST alen; struct super_block *sb = sbi->sb; size_t alcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (opt & ALLOCATE_MFT) { zlen = wnd_zone_len(wnd); if (!zlen) { err = ntfs_refresh_zone(sbi); if (err) goto up_write; zlen = wnd_zone_len(wnd); } if (!zlen) { ntfs_err(sbi->sb, "no free space to extend mft"); err = -ENOSPC; goto up_write; } lcn = wnd_zone_bit(wnd); alen = min_t(CLST, len, zlen); wnd_zone_set(wnd, lcn + alen, zlen - alen); err = wnd_set_used(wnd, lcn, alen); if (err) goto up_write; alcn = lcn; goto space_found; } /* * 'Cause cluster 0 is always used this value means that we should use * cached value of 'next_free_lcn' to improve performance. */ if (!lcn) lcn = sbi->used.next_free_lcn; if (lcn >= wnd->nbits) lcn = 0; alen = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &alcn); if (alen) goto space_found; /* Try to use clusters from MftZone. */ zlen = wnd_zone_len(wnd); zeroes = wnd_zeroes(wnd); /* Check too big request */ if (len > zeroes + zlen || zlen <= NTFS_MIN_MFT_ZONE) { err = -ENOSPC; goto up_write; } /* How many clusters to cat from zone. */ zlcn = wnd_zone_bit(wnd); zlen2 = zlen >> 1; ztrim = clamp_val(len, zlen2, zlen); new_zlen = max_t(size_t, zlen - ztrim, NTFS_MIN_MFT_ZONE); wnd_zone_set(wnd, zlcn, new_zlen); /* Allocate continues clusters. */ alen = wnd_find(wnd, len, 0, BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &alcn); if (!alen) { err = -ENOSPC; goto up_write; } space_found: err = 0; *new_len = alen; *new_lcn = alcn; ntfs_unmap_meta(sb, alcn, alen); /* Set hint for next requests. */ if (!(opt & ALLOCATE_MFT)) sbi->used.next_free_lcn = alcn + alen; up_write: up_write(&wnd->rw_lock); return err; } /* * ntfs_check_for_free_space * * Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records */ bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen) { size_t free, zlen, avail; struct wnd_bitmap *wnd; wnd = &sbi->used.bitmap; down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); free = wnd_zeroes(wnd); zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd)); up_read(&wnd->rw_lock); if (free < zlen + clen) return false; avail = free - (zlen + clen); wnd = &sbi->mft.bitmap; down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); free = wnd_zeroes(wnd); zlen = wnd_zone_len(wnd); up_read(&wnd->rw_lock); if (free >= zlen + mlen) return true; return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits); } /* * ntfs_extend_mft - Allocate additional MFT records. * * sbi->mft.bitmap is locked for write. * * NOTE: recursive: * ntfs_look_free_mft -> * ntfs_extend_mft -> * attr_set_size -> * ni_insert_nonresident -> * ni_insert_attr -> * ni_ins_attr_ext -> * ntfs_look_free_mft -> * ntfs_extend_mft * * To avoid recursive always allocate space for two new MFT records * see attrib.c: "at least two MFT to avoid recursive loop". */ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) { int err; struct ntfs_inode *ni = sbi->mft.ni; size_t new_mft_total; u64 new_mft_bytes, new_bitmap_bytes; struct ATTRIB *attr; struct wnd_bitmap *wnd = &sbi->mft.bitmap; new_mft_total = ALIGN(wnd->nbits + NTFS_MFT_INCREASE_STEP, 128); new_mft_bytes = (u64)new_mft_total << sbi->record_bits; /* Step 1: Resize $MFT::DATA. */ down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_mft_bytes, NULL, false, &attr); if (err) { up_write(&ni->file.run_lock); goto out; } attr->nres.valid_size = attr->nres.data_size; new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits; ni->mi.dirty = true; /* Step 2: Resize $MFT::BITMAP. */ new_bitmap_bytes = ntfs3_bitmap_size(new_mft_total); err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run, new_bitmap_bytes, &new_bitmap_bytes, true, NULL); /* Refresh MFT Zone if necessary. */ down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS); ntfs_refresh_zone(sbi); up_write(&sbi->used.bitmap.rw_lock); up_write(&ni->file.run_lock); if (err) goto out; err = wnd_extend(wnd, new_mft_total); if (err) goto out; ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total); err = _ni_write_inode(&ni->vfs_inode, 0); out: return err; } /* * ntfs_look_free_mft - Look for a free MFT record. */ int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, struct ntfs_inode *ni, struct mft_inode **mi) { int err = 0; size_t zbit, zlen, from, to, fr; size_t mft_total; struct MFT_REF ref; struct super_block *sb = sbi->sb; struct wnd_bitmap *wnd = &sbi->mft.bitmap; u32 ir; static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >= MFT_REC_FREE - MFT_REC_RESERVED); if (!mft) down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); zlen = wnd_zone_len(wnd); /* Always reserve space for MFT. */ if (zlen) { if (mft) { zbit = wnd_zone_bit(wnd); *rno = zbit; wnd_zone_set(wnd, zbit + 1, zlen - 1); } goto found; } /* No MFT zone. Find the nearest to '0' free MFT. */ if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) { /* Resize MFT */ mft_total = wnd->nbits; err = ntfs_extend_mft(sbi); if (!err) { zbit = mft_total; goto reserve_mft; } if (!mft || MFT_REC_FREE == sbi->mft.next_reserved) goto out; err = 0; /* * Look for free record reserved area [11-16) == * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always * marks it as used. */ if (!sbi->mft.reserved_bitmap) { /* Once per session create internal bitmap for 5 bits. */ sbi->mft.reserved_bitmap = 0xFF; ref.high = 0; for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) { struct inode *i; struct ntfs_inode *ni; struct MFT_REC *mrec; ref.low = cpu_to_le32(ir); ref.seq = cpu_to_le16(ir); i = ntfs_iget5(sb, &ref, NULL); if (IS_ERR(i)) { next: ntfs_notice( sb, "Invalid reserved record %x", ref.low); continue; } if (is_bad_inode(i)) { iput(i); goto next; } ni = ntfs_i(i); mrec = ni->mi.mrec; if (!is_rec_base(mrec)) goto next; if (mrec->hard_links) goto next; if (!ni_std(ni)) goto next; if (ni_find_attr(ni, NULL, NULL, ATTR_NAME, NULL, 0, NULL, NULL)) goto next; __clear_bit(ir - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); } } /* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */ zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap, MFT_REC_FREE, MFT_REC_RESERVED); if (zbit >= MFT_REC_FREE) { sbi->mft.next_reserved = MFT_REC_FREE; goto out; } zlen = 1; sbi->mft.next_reserved = zbit; } else { reserve_mft: zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4; if (zbit + zlen > wnd->nbits) zlen = wnd->nbits - zbit; while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen)) zlen -= 1; /* [zbit, zbit + zlen) will be used for MFT itself. */ from = sbi->mft.used; if (from < zbit) from = zbit; to = zbit + zlen; if (from < to) { ntfs_clear_mft_tail(sbi, from, to); sbi->mft.used = to; } } if (mft) { *rno = zbit; zbit += 1; zlen -= 1; } wnd_zone_set(wnd, zbit, zlen); found: if (!mft) { /* The request to get record for general purpose. */ if (sbi->mft.next_free < MFT_REC_USER) sbi->mft.next_free = MFT_REC_USER; for (;;) { if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) { } else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) { sbi->mft.next_free = sbi->mft.bitmap.nbits; } else { *rno = fr; sbi->mft.next_free = *rno + 1; break; } err = ntfs_extend_mft(sbi); if (err) goto out; } } if (ni && !ni_add_subrecord(ni, *rno, mi)) { err = -ENOMEM; goto out; } /* We have found a record that are not reserved for next MFT. */ if (*rno >= MFT_REC_FREE) wnd_set_used(wnd, *rno, 1); else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) __set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); out: if (!mft) up_write(&wnd->rw_lock); return err; } /* * ntfs_mark_rec_free - Mark record as free. * is_mft - true if we are changing MFT */ void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft) { struct wnd_bitmap *wnd = &sbi->mft.bitmap; if (!is_mft) down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); if (rno >= wnd->nbits) goto out; if (rno >= MFT_REC_FREE) { if (!wnd_is_used(wnd, rno, 1)) ntfs_set_state(sbi, NTFS_DIRTY_ERROR); else wnd_set_free(wnd, rno, 1); } else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) { __clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); } if (rno < wnd_zone_bit(wnd)) wnd_zone_set(wnd, rno, 1); else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER) sbi->mft.next_free = rno; out: if (!is_mft) up_write(&wnd->rw_lock); } /* * ntfs_clear_mft_tail - Format empty records [from, to). * * sbi->mft.bitmap is locked for write. */ int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to) { int err; u32 rs; u64 vbo; struct runs_tree *run; struct ntfs_inode *ni; if (from >= to) return 0; rs = sbi->record_size; ni = sbi->mft.ni; run = &ni->file.run; down_read(&ni->file.run_lock); vbo = (u64)from * rs; for (; from < to; from++, vbo += rs) { struct ntfs_buffers nb; err = ntfs_get_bh(sbi, run, vbo, rs, &nb); if (err) goto out; err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0); nb_put(&nb); if (err) goto out; } out: sbi->mft.used = from; up_read(&ni->file.run_lock); return err; } /* * ntfs_refresh_zone - Refresh MFT zone. * * sbi->used.bitmap is locked for rw. * sbi->mft.bitmap is locked for write. * sbi->mft.ni->file.run_lock for write. */ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) { CLST lcn, vcn, len; size_t lcn_s, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; struct ntfs_inode *ni = sbi->mft.ni; /* Do not change anything unless we have non empty MFT zone. */ if (wnd_zone_len(wnd)) return 0; vcn = bytes_to_cluster(sbi, (u64)sbi->mft.bitmap.nbits << sbi->record_bits); if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL)) lcn = SPARSE_LCN; /* We should always find Last Lcn for MFT. */ if (lcn == SPARSE_LCN) return -EINVAL; lcn_s = lcn + 1; /* Try to allocate clusters after last MFT run. */ zlen = wnd_find(wnd, sbi->zone_max, lcn_s, 0, &lcn_s); wnd_zone_set(wnd, lcn_s, zlen); return 0; } /* * ntfs_update_mftmirr - Update $MFTMirr data. */ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) { int err; struct super_block *sb = sbi->sb; u32 blocksize, bytes; sector_t block1, block2; /* * sb can be NULL here. In this case sbi->flags should be 0 too. */ if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) || unlikely(ntfs3_forced_shutdown(sb))) return; blocksize = sb->s_blocksize; bytes = sbi->mft.recs_mirr << sbi->record_bits; block1 = sbi->mft.lbo >> sb->s_blocksize_bits; block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits; for (; bytes >= blocksize; bytes -= blocksize) { struct buffer_head *bh1, *bh2; bh1 = sb_bread(sb, block1++); if (!bh1) return; bh2 = sb_getblk(sb, block2++); if (!bh2) { put_bh(bh1); return; } if (buffer_locked(bh2)) __wait_on_buffer(bh2); lock_buffer(bh2); memcpy(bh2->b_data, bh1->b_data, blocksize); set_buffer_uptodate(bh2); mark_buffer_dirty(bh2); unlock_buffer(bh2); put_bh(bh1); bh1 = NULL; err = wait ? sync_dirty_buffer(bh2) : 0; put_bh(bh2); if (err) return; } sbi->flags &= ~NTFS_FLAGS_MFTMIRR; } /* * ntfs_bad_inode * * Marks inode as bad and marks fs as 'dirty' */ void ntfs_bad_inode(struct inode *inode, const char *hint) { struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; ntfs_inode_err(inode, "%s", hint); make_bad_inode(inode); /* Avoid recursion if bad inode is $Volume. */ if (inode->i_ino != MFT_REC_VOL && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) { ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } } /* * ntfs_set_state * * Mount: ntfs_set_state(NTFS_DIRTY_DIRTY) * Umount: ntfs_set_state(NTFS_DIRTY_CLEAR) * NTFS error: ntfs_set_state(NTFS_DIRTY_ERROR) */ int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty) { int err; struct ATTRIB *attr; struct VOLUME_INFO *info; struct mft_inode *mi; struct ntfs_inode *ni; __le16 info_flags; /* * Do not change state if fs was real_dirty. * Do not change state if fs already dirty(clear). * Do not change any thing if mounted read only. */ if (sbi->volume.real_dirty || sb_rdonly(sbi->sb)) return 0; /* Check cached value. */ if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) == (sbi->volume.flags & VOLUME_FLAG_DIRTY)) return 0; ni = sbi->volume.ni; if (!ni) return -EINVAL; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY); attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi); if (!attr) { err = -EINVAL; goto out; } info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO); if (!info) { err = -EINVAL; goto out; } info_flags = info->flags; switch (dirty) { case NTFS_DIRTY_ERROR: ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors"); sbi->volume.real_dirty = true; fallthrough; case NTFS_DIRTY_DIRTY: info->flags |= VOLUME_FLAG_DIRTY; break; case NTFS_DIRTY_CLEAR: info->flags &= ~VOLUME_FLAG_DIRTY; break; } /* Cache current volume flags. */ if (info_flags != info->flags) { sbi->volume.flags = info->flags; mi->dirty = true; } err = 0; out: ni_unlock(ni); if (err) return err; mark_inode_dirty_sync(&ni->vfs_inode); /* verify(!ntfs_update_mftmirr()); */ /* write mft record on disk. */ err = _ni_write_inode(&ni->vfs_inode, 1); return err; } /* * security_hash - Calculates a hash of security descriptor. */ static inline __le32 security_hash(const void *sd, size_t bytes) { u32 hash = 0; const __le32 *ptr = sd; bytes >>= 2; while (bytes--) hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++); return cpu_to_le32(hash); } /* * simple wrapper for sb_bread_unmovable. */ struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { struct ntfs_sb_info *sbi = sb->s_fs_info; struct buffer_head *bh; if (unlikely(block >= sbi->volume.blocks)) { /* prevent generic message "attempt to access beyond end of device" */ ntfs_err(sb, "try to read out of volume at offset 0x%llx", (u64)block << sb->s_blocksize_bits); return NULL; } bh = sb_bread_unmovable(sb, block); if (bh) return bh; ntfs_err(sb, "failed to read volume at offset 0x%llx", (u64)block << sb->s_blocksize_bits); return NULL; } int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; u32 blocksize = sb->s_blocksize; u64 block = lbo >> sb->s_blocksize_bits; u32 off = lbo & (blocksize - 1); u32 op = blocksize - off; for (; bytes; block += 1, off = 0, op = blocksize) { struct buffer_head *bh = __bread(bdev, block, blocksize); if (!bh) return -EIO; if (op > bytes) op = bytes; memcpy(buffer, bh->b_data + off, op); put_bh(bh); bytes -= op; buffer = Add2Ptr(buffer, op); } return 0; } int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, const void *buf, int wait) { u32 blocksize = sb->s_blocksize; struct block_device *bdev = sb->s_bdev; sector_t block = lbo >> sb->s_blocksize_bits; u32 off = lbo & (blocksize - 1); u32 op = blocksize - off; struct buffer_head *bh; if (!wait && (sb->s_flags & SB_SYNCHRONOUS)) wait = 1; for (; bytes; block += 1, off = 0, op = blocksize) { if (op > bytes) op = bytes; if (op < blocksize) { bh = __bread(bdev, block, blocksize); if (!bh) { ntfs_err(sb, "failed to read block %llx", (u64)block); return -EIO; } } else { bh = __getblk(bdev, block, blocksize); if (!bh) return -ENOMEM; } if (buffer_locked(bh)) __wait_on_buffer(bh); lock_buffer(bh); if (buf) { memcpy(bh->b_data + off, buf, op); buf = Add2Ptr(buf, op); } else { memset(bh->b_data + off, -1, op); } set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); if (wait) { int err = sync_dirty_buffer(bh); if (err) { ntfs_err( sb, "failed to sync buffer at block %llx, error %d", (u64)block, err); put_bh(bh); return err; } } put_bh(bh); bytes -= op; } return 0; } int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, const void *buf, size_t bytes, int sync) { struct super_block *sb = sbi->sb; u8 cluster_bits = sbi->cluster_bits; u32 off = vbo & sbi->cluster_mask; CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next; u64 lbo, len; size_t idx; if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) return -ENOENT; if (lcn == SPARSE_LCN) return -EINVAL; lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; for (;;) { u32 op = min_t(u64, len, bytes); int err = ntfs_sb_write(sb, lbo, op, buf, sync); if (err) return err; bytes -= op; if (!bytes) break; vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) return -ENOENT; if (lcn == SPARSE_LCN) return -EINVAL; if (buf) buf = Add2Ptr(buf, op); lbo = ((u64)lcn << cluster_bits); len = ((u64)clen << cluster_bits); } return 0; } struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo) { struct super_block *sb = sbi->sb; u8 cluster_bits = sbi->cluster_bits; CLST lcn; u64 lbo; if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL)) return ERR_PTR(-ENOENT); lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask); return ntfs_bread(sb, lbo >> sb->s_blocksize_bits); } int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb) { int err; struct super_block *sb = sbi->sb; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 off = vbo & sbi->cluster_mask; u32 nbh = 0; CLST vcn_next, vcn = vbo >> cluster_bits; CLST lcn, clen; u64 lbo, len; size_t idx; struct buffer_head *bh; if (!run) { /* First reading of $Volume + $MFTMirr + $LogFile goes here. */ if (vbo > MFT_REC_VOL * sbi->record_size) { err = -ENOENT; goto out; } /* Use absolute boot's 'MFTCluster' to read record. */ lbo = vbo + sbi->mft.lbo; len = sbi->record_size; } else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { err = -ENOENT; goto out; } else { if (lcn == SPARSE_LCN) { err = -EINVAL; goto out; } lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; } off = lbo & (blocksize - 1); if (nb) { nb->off = off; nb->bytes = bytes; } for (;;) { u32 len32 = len >= bytes ? bytes : len; sector_t block = lbo >> sb->s_blocksize_bits; do { u32 op = blocksize - off; if (op > len32) op = len32; bh = ntfs_bread(sb, block); if (!bh) { err = -EIO; goto out; } if (buf) { memcpy(buf, bh->b_data + off, op); buf = Add2Ptr(buf, op); } if (!nb) { put_bh(bh); } else if (nbh >= ARRAY_SIZE(nb->bh)) { err = -EINVAL; goto out; } else { nb->bh[nbh++] = bh; nb->nbufs = nbh; } bytes -= op; if (!bytes) return 0; len32 -= op; block += 1; off = 0; } while (len32); vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) { err = -ENOENT; goto out; } if (lcn == SPARSE_LCN) { err = -EINVAL; goto out; } lbo = ((u64)lcn << cluster_bits); len = ((u64)clen << cluster_bits); } out: if (!nbh) return err; while (nbh) { put_bh(nb->bh[--nbh]); nb->bh[nbh] = NULL; } nb->nbufs = 0; return err; } /* * ntfs_read_bh * * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups. */ int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, struct NTFS_RECORD_HEADER *rhdr, u32 bytes, struct ntfs_buffers *nb) { int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb); if (err) return err; return ntfs_fix_post_read(rhdr, nb->bytes, true); } int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u32 bytes, struct ntfs_buffers *nb) { int err = 0; struct super_block *sb = sbi->sb; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; CLST vcn_next, vcn = vbo >> cluster_bits; u32 off; u32 nbh = 0; CLST lcn, clen; u64 lbo, len; size_t idx; nb->bytes = bytes; if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { err = -ENOENT; goto out; } off = vbo & sbi->cluster_mask; lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; nb->off = off = lbo & (blocksize - 1); for (;;) { u32 len32 = min_t(u64, len, bytes); sector_t block = lbo >> sb->s_blocksize_bits; do { u32 op; struct buffer_head *bh; if (nbh >= ARRAY_SIZE(nb->bh)) { err = -EINVAL; goto out; } op = blocksize - off; if (op > len32) op = len32; if (op == blocksize) { bh = sb_getblk(sb, block); if (!bh) { err = -ENOMEM; goto out; } if (buffer_locked(bh)) __wait_on_buffer(bh); set_buffer_uptodate(bh); } else { bh = ntfs_bread(sb, block); if (!bh) { err = -EIO; goto out; } } nb->bh[nbh++] = bh; bytes -= op; if (!bytes) { nb->nbufs = nbh; return 0; } block += 1; len32 -= op; off = 0; } while (len32); vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) { err = -ENOENT; goto out; } lbo = ((u64)lcn << cluster_bits); len = ((u64)clen << cluster_bits); } out: while (nbh) { put_bh(nb->bh[--nbh]); nb->bh[nbh] = NULL; } nb->nbufs = 0; return err; } int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, struct ntfs_buffers *nb, int sync) { int err = 0; struct super_block *sb = sbi->sb; u32 block_size = sb->s_blocksize; u32 bytes = nb->bytes; u32 off = nb->off; u16 fo = le16_to_cpu(rhdr->fix_off); u16 fn = le16_to_cpu(rhdr->fix_num); u32 idx; __le16 *fixup; __le16 sample; if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || fn * SECTOR_SIZE > bytes) { return -EINVAL; } for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) { u32 op = block_size - off; char *bh_data; struct buffer_head *bh = nb->bh[idx]; __le16 *ptr, *end_data; if (op > bytes) op = bytes; if (buffer_locked(bh)) __wait_on_buffer(bh); lock_buffer(bh); bh_data = bh->b_data + off; end_data = Add2Ptr(bh_data, op); memcpy(bh_data, rhdr, op); if (!idx) { u16 t16; fixup = Add2Ptr(bh_data, fo); sample = *fixup; t16 = le16_to_cpu(sample); if (t16 >= 0x7FFF) { sample = *fixup = cpu_to_le16(1); } else { sample = cpu_to_le16(t16 + 1); *fixup = sample; } *(__le16 *)Add2Ptr(rhdr, fo) = sample; } ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short)); do { *++fixup = *ptr; *ptr = sample; ptr += SECTOR_SIZE / sizeof(short); } while (ptr < end_data); set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); if (sync) { int err2 = sync_dirty_buffer(bh); if (!err && err2) err = err2; } bytes -= op; rhdr = Add2Ptr(rhdr, op); } return err; } /* * ntfs_bio_pages - Read/write pages from/to disk. */ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, struct page **pages, u32 nr_pages, u64 vbo, u32 bytes, enum req_op op) { int err = 0; struct bio *new, *bio = NULL; struct super_block *sb = sbi->sb; struct block_device *bdev = sb->s_bdev; struct page *page; u8 cluster_bits = sbi->cluster_bits; CLST lcn, clen, vcn, vcn_next; u32 add, off, page_idx; u64 lbo, len; size_t run_idx; struct blk_plug plug; if (!bytes) return 0; blk_start_plug(&plug); /* Align vbo and bytes to be 512 bytes aligned. */ lbo = (vbo + bytes + 511) & ~511ull; vbo = vbo & ~511ull; bytes = lbo - vbo; vcn = vbo >> cluster_bits; if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) { err = -ENOENT; goto out; } off = vbo & sbi->cluster_mask; page_idx = 0; page = pages[0]; for (;;) { lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; new_bio: new = bio_alloc(bdev, nr_pages - page_idx, op, GFP_NOFS); if (bio) { bio_chain(bio, new); submit_bio(bio); } bio = new; bio->bi_iter.bi_sector = lbo >> 9; while (len) { off = vbo & (PAGE_SIZE - 1); add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len; if (bio_add_page(bio, page, add, off) < add) goto new_bio; if (bytes <= add) goto out; bytes -= add; vbo += add; if (add + off == PAGE_SIZE) { page_idx += 1; if (WARN_ON(page_idx >= nr_pages)) { err = -EINVAL; goto out; } page = pages[page_idx]; } if (len <= add) break; len -= add; lbo += add; } vcn_next = vcn + clen; if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) || vcn != vcn_next) { err = -ENOENT; goto out; } off = 0; } out: if (bio) { if (!err) err = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); return err; } /* * ntfs_bio_fill_1 - Helper for ntfs_loadlog_and_replay(). * * Fill on-disk logfile range by (-1) * this means empty logfile. */ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run) { int err = 0; struct super_block *sb = sbi->sb; struct block_device *bdev = sb->s_bdev; u8 cluster_bits = sbi->cluster_bits; struct bio *new, *bio = NULL; CLST lcn, clen; u64 lbo, len; size_t run_idx; struct page *fill; void *kaddr; struct blk_plug plug; fill = alloc_page(GFP_KERNEL); if (!fill) return -ENOMEM; kaddr = kmap_atomic(fill); memset(kaddr, -1, PAGE_SIZE); kunmap_atomic(kaddr); flush_dcache_page(fill); lock_page(fill); if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) { err = -ENOENT; goto out; } /* * TODO: Try blkdev_issue_write_same. */ blk_start_plug(&plug); do { lbo = (u64)lcn << cluster_bits; len = (u64)clen << cluster_bits; new_bio: new = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS); if (bio) { bio_chain(bio, new); submit_bio(bio); } bio = new; bio->bi_iter.bi_sector = lbo >> 9; for (;;) { u32 add = len > PAGE_SIZE ? PAGE_SIZE : len; if (bio_add_page(bio, fill, add, 0) < add) goto new_bio; lbo += add; if (len <= add) break; len -= add; } } while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen)); if (!err) err = submit_bio_wait(bio); bio_put(bio); blk_finish_plug(&plug); out: unlock_page(fill); put_page(fill); return err; } int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u64 *lbo, u64 *bytes) { u32 off; CLST lcn, len; u8 cluster_bits = sbi->cluster_bits; if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL)) return -ENOENT; off = vbo & sbi->cluster_mask; *lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off); *bytes = ((u64)len << cluster_bits) - off; return 0; } struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, enum RECORD_FLAG flag) { int err = 0; struct super_block *sb = sbi->sb; struct inode *inode = new_inode(sb); struct ntfs_inode *ni; if (!inode) return ERR_PTR(-ENOMEM); ni = ntfs_i(inode); err = mi_format_new(&ni->mi, sbi, rno, flag, false); if (err) goto out; inode->i_ino = rno; if (insert_inode_locked(inode) < 0) { err = -EIO; goto out; } out: if (err) { make_bad_inode(inode); iput(inode); ni = ERR_PTR(err); } return ni; } /* * O:BAG:BAD:(A;OICI;FA;;;WD) * Owner S-1-5-32-544 (Administrators) * Group S-1-5-32-544 (Administrators) * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS */ const u8 s_default_security[] __aligned(8) = { 0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, }; static_assert(sizeof(s_default_security) == 0x50); static inline u32 sid_length(const struct SID *sid) { return struct_size(sid, SubAuthority, sid->SubAuthorityCount); } /* * is_acl_valid * * Thanks Mark Harmstone for idea. */ static bool is_acl_valid(const struct ACL *acl, u32 len) { const struct ACE_HEADER *ace; u32 i; u16 ace_count, ace_size; if (acl->AclRevision != ACL_REVISION && acl->AclRevision != ACL_REVISION_DS) { /* * This value should be ACL_REVISION, unless the ACL contains an * object-specific ACE, in which case this value must be ACL_REVISION_DS. * All ACEs in an ACL must be at the same revision level. */ return false; } if (acl->Sbz1) return false; if (le16_to_cpu(acl->AclSize) > len) return false; if (acl->Sbz2) return false; len -= sizeof(struct ACL); ace = (struct ACE_HEADER *)&acl[1]; ace_count = le16_to_cpu(acl->AceCount); for (i = 0; i < ace_count; i++) { if (len < sizeof(struct ACE_HEADER)) return false; ace_size = le16_to_cpu(ace->AceSize); if (len < ace_size) return false; len -= ace_size; ace = Add2Ptr(ace, ace_size); } return true; } bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len) { u32 sd_owner, sd_group, sd_sacl, sd_dacl; if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE)) return false; if (sd->Revision != 1) return false; if (sd->Sbz1) return false; if (!(sd->Control & SE_SELF_RELATIVE)) return false; sd_owner = le32_to_cpu(sd->Owner); if (sd_owner) { const struct SID *owner = Add2Ptr(sd, sd_owner); if (sd_owner + offsetof(struct SID, SubAuthority) > len) return false; if (owner->Revision != 1) return false; if (sd_owner + sid_length(owner) > len) return false; } sd_group = le32_to_cpu(sd->Group); if (sd_group) { const struct SID *group = Add2Ptr(sd, sd_group); if (sd_group + offsetof(struct SID, SubAuthority) > len) return false; if (group->Revision != 1) return false; if (sd_group + sid_length(group) > len) return false; } sd_sacl = le32_to_cpu(sd->Sacl); if (sd_sacl) { const struct ACL *sacl = Add2Ptr(sd, sd_sacl); if (sd_sacl + sizeof(struct ACL) > len) return false; if (!is_acl_valid(sacl, len - sd_sacl)) return false; } sd_dacl = le32_to_cpu(sd->Dacl); if (sd_dacl) { const struct ACL *dacl = Add2Ptr(sd, sd_dacl); if (sd_dacl + sizeof(struct ACL) > len) return false; if (!is_acl_valid(dacl, len - sd_dacl)) return false; } return true; } /* * ntfs_security_init - Load and parse $Secure. */ int ntfs_security_init(struct ntfs_sb_info *sbi) { int err; struct super_block *sb = sbi->sb; struct inode *inode; struct ntfs_inode *ni; struct MFT_REF ref; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; u64 sds_size; size_t off; struct NTFS_DE *ne; struct NTFS_DE_SII *sii_e; struct ntfs_fnd *fnd_sii = NULL; const struct INDEX_ROOT *root_sii; const struct INDEX_ROOT *root_sdh; struct ntfs_index *indx_sdh = &sbi->security.index_sdh; struct ntfs_index *indx_sii = &sbi->security.index_sii; ref.low = cpu_to_le32(MFT_REC_SECURE); ref.high = 0; ref.seq = cpu_to_le16(MFT_REC_SECURE); inode = ntfs_iget5(sb, &ref, &NAME_SECURE); if (IS_ERR(inode)) { err = PTR_ERR(inode); ntfs_err(sb, "Failed to load $Secure (%d).", err); inode = NULL; goto out; } ni = ntfs_i(inode); le = NULL; attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME, ARRAY_SIZE(SDH_NAME), NULL, NULL); if (!attr || !(root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || root_sdh->type != ATTR_ZERO || root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root_sdh->ihdr.used) > le32_to_cpu(attr->res.data_size)) { ntfs_err(sb, "$Secure::$SDH is corrupted."); err = -EINVAL; goto out; } err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH); if (err) { ntfs_err(sb, "Failed to initialize $Secure::$SDH (%d).", err); goto out; } attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME, ARRAY_SIZE(SII_NAME), NULL, NULL); if (!attr || !(root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || root_sii->type != ATTR_ZERO || root_sii->rule != NTFS_COLLATION_TYPE_UINT || offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root_sii->ihdr.used) > le32_to_cpu(attr->res.data_size)) { ntfs_err(sb, "$Secure::$SII is corrupted."); err = -EINVAL; goto out; } err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII); if (err) { ntfs_err(sb, "Failed to initialize $Secure::$SII (%d).", err); goto out; } fnd_sii = fnd_get(); if (!fnd_sii) { err = -ENOMEM; goto out; } sds_size = inode->i_size; /* Find the last valid Id. */ sbi->security.next_id = SECURITY_ID_FIRST; /* Always write new security at the end of bucket. */ sbi->security.next_off = ALIGN(sds_size - SecurityDescriptorsBlockSize, 16); off = 0; ne = NULL; for (;;) { u32 next_id; err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii); if (err || !ne) break; sii_e = (struct NTFS_DE_SII *)ne; if (le16_to_cpu(ne->view.data_size) < sizeof(sii_e->sec_hdr)) continue; next_id = le32_to_cpu(sii_e->sec_id) + 1; if (next_id >= sbi->security.next_id) sbi->security.next_id = next_id; } sbi->security.ni = ni; inode = NULL; out: iput(inode); fnd_put(fnd_sii); return err; } /* * ntfs_get_security_by_id - Read security descriptor by id. */ int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id, struct SECURITY_DESCRIPTOR_RELATIVE **sd, size_t *size) { int err; int diff; struct ntfs_inode *ni = sbi->security.ni; struct ntfs_index *indx = &sbi->security.index_sii; void *p = NULL; struct NTFS_DE_SII *sii_e; struct ntfs_fnd *fnd_sii; struct SECURITY_HDR d_security; const struct INDEX_ROOT *root_sii; u32 t32; *sd = NULL; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY); fnd_sii = fnd_get(); if (!fnd_sii) { err = -ENOMEM; goto out; } root_sii = indx_get_root(indx, ni, NULL, NULL); if (!root_sii) { err = -EINVAL; goto out; } /* Try to find this SECURITY descriptor in SII indexes. */ err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id), NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii); if (err) goto out; if (diff) goto out; t32 = le32_to_cpu(sii_e->sec_hdr.size); if (t32 < sizeof(struct SECURITY_HDR)) { err = -EINVAL; goto out; } if (t32 > sizeof(struct SECURITY_HDR) + 0x10000) { /* Looks like too big security. 0x10000 - is arbitrary big number. */ err = -EFBIG; goto out; } *size = t32 - sizeof(struct SECURITY_HDR); p = kmalloc(*size, GFP_NOFS); if (!p) { err = -ENOMEM; goto out; } err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(sii_e->sec_hdr.off), &d_security, sizeof(d_security), NULL); if (err) goto out; if (memcmp(&d_security, &sii_e->sec_hdr, sizeof(d_security))) { err = -EINVAL; goto out; } err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(sii_e->sec_hdr.off) + sizeof(struct SECURITY_HDR), p, *size, NULL); if (err) goto out; *sd = p; p = NULL; out: kfree(p); fnd_put(fnd_sii); ni_unlock(ni); return err; } /* * ntfs_insert_security - Insert security descriptor into $Secure::SDS. * * SECURITY Descriptor Stream data is organized into chunks of 256K bytes * and it contains a mirror copy of each security descriptor. When writing * to a security descriptor at location X, another copy will be written at * location (X+256K). * When writing a security descriptor that will cross the 256K boundary, * the pointer will be advanced by 256K to skip * over the mirror portion. */ int ntfs_insert_security(struct ntfs_sb_info *sbi, const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 size_sd, __le32 *security_id, bool *inserted) { int err, diff; struct ntfs_inode *ni = sbi->security.ni; struct ntfs_index *indx_sdh = &sbi->security.index_sdh; struct ntfs_index *indx_sii = &sbi->security.index_sii; struct NTFS_DE_SDH *e; struct NTFS_DE_SDH sdh_e; struct NTFS_DE_SII sii_e; struct SECURITY_HDR *d_security; u32 new_sec_size = size_sd + sizeof(struct SECURITY_HDR); u32 aligned_sec_size = ALIGN(new_sec_size, 16); struct SECURITY_KEY hash_key; struct ntfs_fnd *fnd_sdh = NULL; const struct INDEX_ROOT *root_sdh; const struct INDEX_ROOT *root_sii; u64 mirr_off, new_sds_size; u32 next, left; static_assert((1 << Log2OfSecurityDescriptorsBlockSize) == SecurityDescriptorsBlockSize); hash_key.hash = security_hash(sd, size_sd); hash_key.sec_id = SECURITY_ID_INVALID; if (inserted) *inserted = false; *security_id = SECURITY_ID_INVALID; /* Allocate a temporal buffer. */ d_security = kzalloc(aligned_sec_size, GFP_NOFS); if (!d_security) return -ENOMEM; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY); fnd_sdh = fnd_get(); if (!fnd_sdh) { err = -ENOMEM; goto out; } root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL); if (!root_sdh) { err = -EINVAL; goto out; } root_sii = indx_get_root(indx_sii, ni, NULL, NULL); if (!root_sii) { err = -EINVAL; goto out; } /* * Check if such security already exists. * Use "SDH" and hash -> to get the offset in "SDS". */ err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key), &d_security->key.sec_id, &diff, (struct NTFS_DE **)&e, fnd_sdh); if (err) goto out; while (e) { if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) { err = ntfs_read_run_nb(sbi, &ni->file.run, le64_to_cpu(e->sec_hdr.off), d_security, new_sec_size, NULL); if (err) goto out; if (le32_to_cpu(d_security->size) == new_sec_size && d_security->key.hash == hash_key.hash && !memcmp(d_security + 1, sd, size_sd)) { /* Such security already exists. */ *security_id = d_security->key.sec_id; err = 0; goto out; } } err = indx_find_sort(indx_sdh, ni, root_sdh, (struct NTFS_DE **)&e, fnd_sdh); if (err) goto out; if (!e || e->key.hash != hash_key.hash) break; } /* Zero unused space. */ next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1); left = SecurityDescriptorsBlockSize - next; /* Zero gap until SecurityDescriptorsBlockSize. */ if (left < new_sec_size) { /* Zero "left" bytes from sbi->security.next_off. */ sbi->security.next_off += SecurityDescriptorsBlockSize + left; } /* Zero tail of previous security. */ //used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1); /* * Example: * 0x40438 == ni->vfs_inode.i_size * 0x00440 == sbi->security.next_off * need to zero [0x438-0x440) * if (next > used) { * u32 tozero = next - used; * zero "tozero" bytes from sbi->security.next_off - tozero */ /* Format new security descriptor. */ d_security->key.hash = hash_key.hash; d_security->key.sec_id = cpu_to_le32(sbi->security.next_id); d_security->off = cpu_to_le64(sbi->security.next_off); d_security->size = cpu_to_le32(new_sec_size); memcpy(d_security + 1, sd, size_sd); /* Write main SDS bucket. */ err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off, d_security, aligned_sec_size, 0); if (err) goto out; mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize; new_sds_size = mirr_off + aligned_sec_size; if (new_sds_size > ni->vfs_inode.i_size) { err = attr_set_size(ni, ATTR_DATA, SDS_NAME, ARRAY_SIZE(SDS_NAME), &ni->file.run, new_sds_size, &new_sds_size, false, NULL); if (err) goto out; } /* Write copy SDS bucket. */ err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security, aligned_sec_size, 0); if (err) goto out; /* Fill SII entry. */ sii_e.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr)); sii_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR)); sii_e.de.view.res = 0; sii_e.de.size = cpu_to_le16(sizeof(struct NTFS_DE_SII)); sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id)); sii_e.de.flags = 0; sii_e.de.res = 0; sii_e.sec_id = d_security->key.sec_id; memcpy(&sii_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR)); err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0); if (err) goto out; /* Fill SDH entry. */ sdh_e.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr)); sdh_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR)); sdh_e.de.view.res = 0; sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY); sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key)); sdh_e.de.flags = 0; sdh_e.de.res = 0; sdh_e.key.hash = d_security->key.hash; sdh_e.key.sec_id = d_security->key.sec_id; memcpy(&sdh_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR)); sdh_e.magic[0] = cpu_to_le16('I'); sdh_e.magic[1] = cpu_to_le16('I'); fnd_clear(fnd_sdh); err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1, fnd_sdh, 0); if (err) goto out; *security_id = d_security->key.sec_id; if (inserted) *inserted = true; /* Update Id and offset for next descriptor. */ sbi->security.next_id += 1; sbi->security.next_off += aligned_sec_size; out: fnd_put(fnd_sdh); mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); kfree(d_security); return err; } /* * ntfs_reparse_init - Load and parse $Extend/$Reparse. */ int ntfs_reparse_init(struct ntfs_sb_info *sbi) { int err; struct ntfs_inode *ni = sbi->reparse.ni; struct ntfs_index *indx = &sbi->reparse.index_r; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; const struct INDEX_ROOT *root_r; if (!ni) return 0; le = NULL; attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME, ARRAY_SIZE(SR_NAME), NULL, NULL); if (!attr) { err = -EINVAL; goto out; } root_r = resident_data(attr); if (root_r->type != ATTR_ZERO || root_r->rule != NTFS_COLLATION_TYPE_UINTS) { err = -EINVAL; goto out; } err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR); if (err) goto out; out: return err; } /* * ntfs_objid_init - Load and parse $Extend/$ObjId. */ int ntfs_objid_init(struct ntfs_sb_info *sbi) { int err; struct ntfs_inode *ni = sbi->objid.ni; struct ntfs_index *indx = &sbi->objid.index_o; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; const struct INDEX_ROOT *root; if (!ni) return 0; le = NULL; attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME, ARRAY_SIZE(SO_NAME), NULL, NULL); if (!attr) { err = -EINVAL; goto out; } root = resident_data(attr); if (root->type != ATTR_ZERO || root->rule != NTFS_COLLATION_TYPE_UINTS) { err = -EINVAL; goto out; } err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO); if (err) goto out; out: return err; } int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid) { int err; struct ntfs_inode *ni = sbi->objid.ni; struct ntfs_index *indx = &sbi->objid.index_o; if (!ni) return -EINVAL; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID); err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL); mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); return err; } int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag, const struct MFT_REF *ref) { int err; struct ntfs_inode *ni = sbi->reparse.ni; struct ntfs_index *indx = &sbi->reparse.index_r; struct NTFS_DE_R re; if (!ni) return -EINVAL; memset(&re, 0, sizeof(re)); re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero)); re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R)); re.de.key_size = cpu_to_le16(sizeof(re.key)); re.key.ReparseTag = rtag; memcpy(&re.key.ref, ref, sizeof(*ref)); mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE); err = indx_insert_entry(indx, ni, &re.de, NULL, NULL, 0); mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); return err; } int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag, const struct MFT_REF *ref) { int err, diff; struct ntfs_inode *ni = sbi->reparse.ni; struct ntfs_index *indx = &sbi->reparse.index_r; struct ntfs_fnd *fnd = NULL; struct REPARSE_KEY rkey; struct NTFS_DE_R *re; struct INDEX_ROOT *root_r; if (!ni) return -EINVAL; rkey.ReparseTag = rtag; rkey.ref = *ref; mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE); if (rtag) { err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL); goto out1; } fnd = fnd_get(); if (!fnd) { err = -ENOMEM; goto out1; } root_r = indx_get_root(indx, ni, NULL, NULL); if (!root_r) { err = -EINVAL; goto out; } /* 1 - forces to ignore rkey.ReparseTag when comparing keys. */ err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff, (struct NTFS_DE **)&re, fnd); if (err) goto out; if (memcmp(&re->key.ref, ref, sizeof(*ref))) { /* Impossible. Looks like volume corrupt? */ goto out; } memcpy(&rkey, &re->key, sizeof(rkey)); fnd_put(fnd); fnd = NULL; err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL); if (err) goto out; out: fnd_put(fnd); out1: mark_inode_dirty(&ni->vfs_inode); ni_unlock(ni); return err; } static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len) { ntfs_unmap_meta(sbi->sb, lcn, len); ntfs_discard(sbi, lcn, len); } void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) { CLST end, i, zone_len, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; bool dirty = false; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (!wnd_is_used(wnd, lcn, len)) { /* mark volume as dirty out of wnd->rw_lock */ dirty = true; end = lcn + len; len = 0; for (i = lcn; i < end; i++) { if (wnd_is_used(wnd, i, 1)) { if (!len) lcn = i; len += 1; continue; } if (!len) continue; if (trim) ntfs_unmap_and_discard(sbi, lcn, len); wnd_set_free(wnd, lcn, len); len = 0; } if (!len) goto out; } if (trim) ntfs_unmap_and_discard(sbi, lcn, len); wnd_set_free(wnd, lcn, len); /* append to MFT zone, if possible. */ zone_len = wnd_zone_len(wnd); zlen = min(zone_len + len, sbi->zone_max); if (zlen == zone_len) { /* MFT zone already has maximum size. */ } else if (!zone_len) { /* Create MFT zone only if 'zlen' is large enough. */ if (zlen == sbi->zone_max) wnd_zone_set(wnd, lcn, zlen); } else { CLST zone_lcn = wnd_zone_bit(wnd); if (lcn + len == zone_lcn) { /* Append into head MFT zone. */ wnd_zone_set(wnd, lcn, zlen); } else if (zone_lcn + zone_len == lcn) { /* Append into tail MFT zone. */ wnd_zone_set(wnd, zone_lcn, zlen); } } out: up_write(&wnd->rw_lock); if (dirty) ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } /* * run_deallocate - Deallocate clusters. */ int run_deallocate(struct ntfs_sb_info *sbi, const struct runs_tree *run, bool trim) { CLST lcn, len; size_t idx = 0; while (run_get_entry(run, idx++, NULL, &lcn, &len)) { if (lcn == SPARSE_LCN) continue; mark_as_free_ex(sbi, lcn, len, trim); } return 0; } static inline bool name_has_forbidden_chars(const struct le_str *fname) { int i, ch; /* check for forbidden chars */ for (i = 0; i < fname->len; ++i) { ch = le16_to_cpu(fname->name[i]); /* control chars */ if (ch < 0x20) return true; switch (ch) { /* disallowed by Windows */ case '\\': case '/': case ':': case '*': case '?': case '<': case '>': case '|': case '\"': return true; default: /* allowed char */ break; } } /* file names cannot end with space or . */ if (fname->len > 0) { ch = le16_to_cpu(fname->name[fname->len - 1]); if (ch == ' ' || ch == '.') return true; } return false; } static inline bool is_reserved_name(const struct ntfs_sb_info *sbi, const struct le_str *fname) { int port_digit; const __le16 *name = fname->name; int len = fname->len; const u16 *upcase = sbi->upcase; /* check for 3 chars reserved names (device names) */ /* name by itself or with any extension is forbidden */ if (len == 3 || (len > 3 && le16_to_cpu(name[3]) == '.')) if (!ntfs_cmp_names(name, 3, CON_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, NUL_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, AUX_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, PRN_NAME, 3, upcase, false)) return true; /* check for 4 chars reserved names (port name followed by 1..9) */ /* name by itself or with any extension is forbidden */ if (len == 4 || (len > 4 && le16_to_cpu(name[4]) == '.')) { port_digit = le16_to_cpu(name[3]); if (port_digit >= '1' && port_digit <= '9') if (!ntfs_cmp_names(name, 3, COM_NAME, 3, upcase, false) || !ntfs_cmp_names(name, 3, LPT_NAME, 3, upcase, false)) return true; } return false; } /* * valid_windows_name - Check if a file name is valid in Windows. */ bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *fname) { return !name_has_forbidden_chars(fname) && !is_reserved_name(sbi, fname); } /* * ntfs_set_label - updates current ntfs label. */ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) { int err; struct ATTRIB *attr; u32 uni_bytes; struct ntfs_inode *ni = sbi->volume.ni; /* Allocate PATH_MAX bytes. */ struct cpu_str *uni = __getname(); if (!uni) return -ENOMEM; err = ntfs_nls_to_utf16(sbi, label, len, uni, (PATH_MAX - 2) / 2, UTF16_LITTLE_ENDIAN); if (err < 0) goto out; uni_bytes = uni->len * sizeof(u16); if (uni_bytes > NTFS_LABEL_MAX_LENGTH * sizeof(u16)) { ntfs_warn(sbi->sb, "new label is too long"); err = -EFBIG; goto out; } ni_lock(ni); /* Ignore any errors. */ ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL); err = ni_insert_resident(ni, uni_bytes, ATTR_LABEL, NULL, 0, &attr, NULL, NULL); if (err < 0) goto unlock_out; /* write new label in on-disk struct. */ memcpy(resident_data(attr), uni->name, uni_bytes); /* update cached value of current label. */ if (len >= ARRAY_SIZE(sbi->volume.label)) len = ARRAY_SIZE(sbi->volume.label) - 1; memcpy(sbi->volume.label, label, len); sbi->volume.label[len] = 0; mark_inode_dirty_sync(&ni->vfs_inode); unlock_out: ni_unlock(ni); if (!err) err = _ni_write_inode(&ni->vfs_inode, 0); out: __putname(uni); return err; } |
55 55 41 52 56 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpih-rshift.c - MPI helper functions * Copyright (C) 1994, 1996, 1998, 1999, * 2000, 2001 Free Software Foundation, Inc. * * This file is part of GNUPG * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" /* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right * and store the USIZE least significant limbs of the result at WP. * The bits shifted out to the right are returned. * * Argument constraints: * 1. 0 < CNT < BITS_PER_MP_LIMB * 2. If the result is to be written over the input, WP must be <= UP. */ mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) { mpi_limb_t high_limb, low_limb; unsigned sh_1, sh_2; mpi_size_t i; mpi_limb_t retval; sh_1 = cnt; wp -= 1; sh_2 = BITS_PER_MPI_LIMB - sh_1; high_limb = up[0]; retval = high_limb << sh_2; low_limb = high_limb; for (i = 1; i < usize; i++) { high_limb = up[i]; wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); low_limb = high_limb; } wp[i] = low_limb >> sh_1; return retval; } |
46 46 25 5 21 36 15 24 27 29 29 2 2 2 2 64 2 6 4 2 2 2 2 17 18 57 25 30 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * seq_oss_readq.c - MIDI input queue * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_readq.h" #include "seq_oss_event.h" #include <sound/seq_oss_legacy.h> #include "../seq_lock.h" #include <linux/wait.h> #include <linux/slab.h> /* * constants */ //#define SNDRV_SEQ_OSS_MAX_TIMEOUT (unsigned long)(-1) #define SNDRV_SEQ_OSS_MAX_TIMEOUT (HZ * 3600) /* * prototypes */ /* * create a read queue */ struct seq_oss_readq * snd_seq_oss_readq_new(struct seq_oss_devinfo *dp, int maxlen) { struct seq_oss_readq *q; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; q->q = kcalloc(maxlen, sizeof(union evrec), GFP_KERNEL); if (!q->q) { kfree(q); return NULL; } q->maxlen = maxlen; q->qlen = 0; q->head = q->tail = 0; init_waitqueue_head(&q->midi_sleep); spin_lock_init(&q->lock); q->pre_event_timeout = SNDRV_SEQ_OSS_MAX_TIMEOUT; q->input_time = (unsigned long)-1; return q; } /* * delete the read queue */ void snd_seq_oss_readq_delete(struct seq_oss_readq *q) { if (q) { kfree(q->q); kfree(q); } } /* * reset the read queue */ void snd_seq_oss_readq_clear(struct seq_oss_readq *q) { if (q->qlen) { q->qlen = 0; q->head = q->tail = 0; } /* if someone sleeping, wake'em up */ wake_up(&q->midi_sleep); q->input_time = (unsigned long)-1; } /* * put a midi byte */ int snd_seq_oss_readq_puts(struct seq_oss_readq *q, int dev, unsigned char *data, int len) { union evrec rec; int result; memset(&rec, 0, sizeof(rec)); rec.c[0] = SEQ_MIDIPUTC; rec.c[2] = dev; while (len-- > 0) { rec.c[1] = *data++; result = snd_seq_oss_readq_put_event(q, &rec); if (result < 0) return result; } return 0; } /* * put MIDI sysex bytes; the event buffer may be chained, thus it has * to be expanded via snd_seq_dump_var_event(). */ struct readq_sysex_ctx { struct seq_oss_readq *readq; int dev; }; static int readq_dump_sysex(void *ptr, void *buf, int count) { struct readq_sysex_ctx *ctx = ptr; return snd_seq_oss_readq_puts(ctx->readq, ctx->dev, buf, count); } int snd_seq_oss_readq_sysex(struct seq_oss_readq *q, int dev, struct snd_seq_event *ev) { struct readq_sysex_ctx ctx = { .readq = q, .dev = dev }; if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) return 0; return snd_seq_dump_var_event(ev, readq_dump_sysex, &ctx); } /* * copy an event to input queue: * return zero if enqueued */ int snd_seq_oss_readq_put_event(struct seq_oss_readq *q, union evrec *ev) { unsigned long flags; spin_lock_irqsave(&q->lock, flags); if (q->qlen >= q->maxlen - 1) { spin_unlock_irqrestore(&q->lock, flags); return -ENOMEM; } memcpy(&q->q[q->tail], ev, sizeof(*ev)); q->tail = (q->tail + 1) % q->maxlen; q->qlen++; /* wake up sleeper */ wake_up(&q->midi_sleep); spin_unlock_irqrestore(&q->lock, flags); return 0; } /* * pop queue * caller must hold lock */ int snd_seq_oss_readq_pick(struct seq_oss_readq *q, union evrec *rec) { if (q->qlen == 0) return -EAGAIN; memcpy(rec, &q->q[q->head], sizeof(*rec)); return 0; } /* * sleep until ready */ void snd_seq_oss_readq_wait(struct seq_oss_readq *q) { wait_event_interruptible_timeout(q->midi_sleep, (q->qlen > 0 || q->head == q->tail), q->pre_event_timeout); } /* * drain one record * caller must hold lock */ void snd_seq_oss_readq_free(struct seq_oss_readq *q) { if (q->qlen > 0) { q->head = (q->head + 1) % q->maxlen; q->qlen--; } } /* * polling/select: * return non-zero if readq is not empty. */ unsigned int snd_seq_oss_readq_poll(struct seq_oss_readq *q, struct file *file, poll_table *wait) { poll_wait(file, &q->midi_sleep, wait); return q->qlen; } /* * put a timestamp */ int snd_seq_oss_readq_put_timestamp(struct seq_oss_readq *q, unsigned long curt, int seq_mode) { if (curt != q->input_time) { union evrec rec; memset(&rec, 0, sizeof(rec)); switch (seq_mode) { case SNDRV_SEQ_OSS_MODE_SYNTH: rec.echo = (curt << 8) | SEQ_WAIT; snd_seq_oss_readq_put_event(q, &rec); break; case SNDRV_SEQ_OSS_MODE_MUSIC: rec.t.code = EV_TIMING; rec.t.cmd = TMR_WAIT_ABS; rec.t.time = curt; snd_seq_oss_readq_put_event(q, &rec); break; } q->input_time = curt; } return 0; } #ifdef CONFIG_SND_PROC_FS /* * proc interface */ void snd_seq_oss_readq_info_read(struct seq_oss_readq *q, struct snd_info_buffer *buf) { snd_iprintf(buf, " read queue [%s] length = %d : tick = %ld\n", (waitqueue_active(&q->midi_sleep) ? "sleeping":"running"), q->qlen, q->input_time); } #endif /* CONFIG_SND_PROC_FS */ |
1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 | // SPDX-License-Identifier: GPL-2.0-or-later /* * DVB USB Linux driver for Anysee E30 DVB-C & DVB-T USB2.0 receiver * * Copyright (C) 2007 Antti Palosaari <crope@iki.fi> * * TODO: * - add smart card reader support for Conditional Access (CA) * * Card reader in Anysee is nothing more than ISO 7816 card reader. * There is no hardware CAM in any Anysee device sold. * In my understanding it should be implemented by making own module * for ISO 7816 card reader, like dvb_ca_en50221 is implemented. This * module registers serial interface that can be used to communicate * with any ISO 7816 smart card. * * Any help according to implement serial smart card reader support * is highly welcome! */ #include "anysee.h" #include "dvb-pll.h" #include "tda1002x.h" #include "mt352.h" #include "mt352_priv.h" #include "zl10353.h" #include "tda18212.h" #include "cx24116.h" #include "stv0900.h" #include "stv6110.h" #include "isl6423.h" #include "cxd2820r.h" DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); static int anysee_ctrl_msg(struct dvb_usb_device *d, u8 *sbuf, u8 slen, u8 *rbuf, u8 rlen) { struct anysee_state *state = d_to_priv(d); int act_len, ret, i; mutex_lock(&d->usb_mutex); memcpy(&state->buf[0], sbuf, slen); state->buf[60] = state->seq++; dev_dbg(&d->udev->dev, "%s: >>> %*ph\n", __func__, slen, state->buf); /* * We need receive one message more after dvb_usbv2_generic_rw_locked() * due to weird transaction flow, which is 1 x send + 2 x receive. */ ret = dvb_usbv2_generic_rw_locked(d, state->buf, sizeof(state->buf), state->buf, sizeof(state->buf)); if (ret) goto error_unlock; /* get answer, retry few times if error returned */ for (i = 0; i < 3; i++) { /* receive 2nd answer */ ret = usb_bulk_msg(d->udev, usb_rcvbulkpipe(d->udev, d->props->generic_bulk_ctrl_endpoint), state->buf, sizeof(state->buf), &act_len, 2000); if (ret) { dev_dbg(&d->udev->dev, "%s: recv bulk message failed=%d\n", __func__, ret); } else { dev_dbg(&d->udev->dev, "%s: <<< %*ph\n", __func__, rlen, state->buf); if (state->buf[63] != 0x4f) dev_dbg(&d->udev->dev, "%s: cmd failed\n", __func__); break; } } if (ret) { /* all retries failed, it is fatal */ dev_err(&d->udev->dev, "%s: recv bulk message failed=%d\n", KBUILD_MODNAME, ret); goto error_unlock; } /* read request, copy returned data to return buf */ if (rbuf && rlen) memcpy(rbuf, state->buf, rlen); error_unlock: mutex_unlock(&d->usb_mutex); return ret; } static int anysee_read_reg(struct dvb_usb_device *d, u16 reg, u8 *val) { u8 buf[] = {CMD_REG_READ, reg >> 8, reg & 0xff, 0x01}; int ret; ret = anysee_ctrl_msg(d, buf, sizeof(buf), val, 1); dev_dbg(&d->udev->dev, "%s: reg=%04x val=%02x\n", __func__, reg, *val); return ret; } static int anysee_write_reg(struct dvb_usb_device *d, u16 reg, u8 val) { u8 buf[] = {CMD_REG_WRITE, reg >> 8, reg & 0xff, 0x01, val}; dev_dbg(&d->udev->dev, "%s: reg=%04x val=%02x\n", __func__, reg, val); return anysee_ctrl_msg(d, buf, sizeof(buf), NULL, 0); } /* write single register with mask */ static int anysee_wr_reg_mask(struct dvb_usb_device *d, u16 reg, u8 val, u8 mask) { int ret; u8 tmp; /* no need for read if whole reg is written */ if (mask != 0xff) { ret = anysee_read_reg(d, reg, &tmp); if (ret) return ret; val &= mask; tmp &= ~mask; val |= tmp; } return anysee_write_reg(d, reg, val); } /* read single register with mask */ static int anysee_rd_reg_mask(struct dvb_usb_device *d, u16 reg, u8 *val, u8 mask) { int ret, i; u8 tmp; ret = anysee_read_reg(d, reg, &tmp); if (ret) return ret; tmp &= mask; /* find position of the first bit */ for (i = 0; i < 8; i++) { if ((mask >> i) & 0x01) break; } *val = tmp >> i; return 0; } static int anysee_get_hw_info(struct dvb_usb_device *d, u8 *id) { u8 buf[] = {CMD_GET_HW_INFO}; return anysee_ctrl_msg(d, buf, sizeof(buf), id, 3); } static int anysee_streaming_ctrl(struct dvb_frontend *fe, int onoff) { u8 buf[] = {CMD_STREAMING_CTRL, (u8)onoff, 0x00}; dev_dbg(&fe_to_d(fe)->udev->dev, "%s: onoff=%d\n", __func__, onoff); return anysee_ctrl_msg(fe_to_d(fe), buf, sizeof(buf), NULL, 0); } static int anysee_led_ctrl(struct dvb_usb_device *d, u8 mode, u8 interval) { u8 buf[] = {CMD_LED_AND_IR_CTRL, 0x01, mode, interval}; dev_dbg(&d->udev->dev, "%s: state=%d interval=%d\n", __func__, mode, interval); return anysee_ctrl_msg(d, buf, sizeof(buf), NULL, 0); } static int anysee_ir_ctrl(struct dvb_usb_device *d, u8 onoff) { u8 buf[] = {CMD_LED_AND_IR_CTRL, 0x02, onoff}; dev_dbg(&d->udev->dev, "%s: onoff=%d\n", __func__, onoff); return anysee_ctrl_msg(d, buf, sizeof(buf), NULL, 0); } /* I2C */ static int anysee_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); int ret = 0, inc, i = 0; u8 buf[52]; /* 4 + 48 (I2C WR USB command header + I2C WR max) */ if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; while (i < num) { if (num > i + 1 && (msg[i+1].flags & I2C_M_RD)) { if (msg[i].len < 1 || msg[i].len > 2 || msg[i + 1].len > 60) { ret = -EOPNOTSUPP; break; } buf[0] = CMD_I2C_READ; buf[1] = (msg[i].addr << 1) | 0x01; buf[2] = msg[i].buf[0]; buf[3] = (msg[i].len < 2) ? 0 : msg[i].buf[1]; buf[4] = msg[i].len-1; buf[5] = msg[i+1].len; ret = anysee_ctrl_msg(d, buf, 6, msg[i+1].buf, msg[i+1].len); inc = 2; } else { if (msg[i].len > 48) { ret = -EOPNOTSUPP; break; } buf[0] = CMD_I2C_WRITE; buf[1] = (msg[i].addr << 1); buf[2] = msg[i].len; buf[3] = 0x01; memcpy(&buf[4], msg[i].buf, msg[i].len); ret = anysee_ctrl_msg(d, buf, 4 + msg[i].len, NULL, 0); inc = 1; } if (ret) break; i += inc; } mutex_unlock(&d->i2c_mutex); return ret ? ret : i; } static u32 anysee_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm anysee_i2c_algo = { .master_xfer = anysee_master_xfer, .functionality = anysee_i2c_func, }; static int anysee_mt352_demod_init(struct dvb_frontend *fe) { static u8 clock_config[] = { CLOCK_CTL, 0x38, 0x28 }; static u8 reset[] = { RESET, 0x80 }; static u8 adc_ctl_1_cfg[] = { ADC_CTL_1, 0x40 }; static u8 agc_cfg[] = { AGC_TARGET, 0x28, 0x20 }; static u8 gpp_ctl_cfg[] = { GPP_CTL, 0x33 }; static u8 capt_range_cfg[] = { CAPT_RANGE, 0x32 }; mt352_write(fe, clock_config, sizeof(clock_config)); udelay(200); mt352_write(fe, reset, sizeof(reset)); mt352_write(fe, adc_ctl_1_cfg, sizeof(adc_ctl_1_cfg)); mt352_write(fe, agc_cfg, sizeof(agc_cfg)); mt352_write(fe, gpp_ctl_cfg, sizeof(gpp_ctl_cfg)); mt352_write(fe, capt_range_cfg, sizeof(capt_range_cfg)); return 0; } /* Callbacks for DVB USB */ static struct tda10023_config anysee_tda10023_config = { .demod_address = (0x1a >> 1), .invert = 0, .xtal = 16000000, .pll_m = 11, .pll_p = 3, .pll_n = 1, .output_mode = TDA10023_OUTPUT_MODE_PARALLEL_C, .deltaf = 0xfeeb, }; static struct mt352_config anysee_mt352_config = { .demod_address = (0x1e >> 1), .demod_init = anysee_mt352_demod_init, }; static struct zl10353_config anysee_zl10353_config = { .demod_address = (0x1e >> 1), .parallel_ts = 1, }; static struct zl10353_config anysee_zl10353_tda18212_config2 = { .demod_address = (0x1e >> 1), .parallel_ts = 1, .disable_i2c_gate_ctrl = 1, .no_tuner = 1, .if2 = 41500, }; static struct zl10353_config anysee_zl10353_tda18212_config = { .demod_address = (0x18 >> 1), .parallel_ts = 1, .disable_i2c_gate_ctrl = 1, .no_tuner = 1, .if2 = 41500, }; static struct tda10023_config anysee_tda10023_tda18212_config = { .demod_address = (0x1a >> 1), .xtal = 16000000, .pll_m = 12, .pll_p = 3, .pll_n = 1, .output_mode = TDA10023_OUTPUT_MODE_PARALLEL_B, .deltaf = 0xba02, }; static const struct tda18212_config anysee_tda18212_config = { .if_dvbt_6 = 4150, .if_dvbt_7 = 4150, .if_dvbt_8 = 4150, .if_dvbc = 5000, }; static const struct tda18212_config anysee_tda18212_config2 = { .if_dvbt_6 = 3550, .if_dvbt_7 = 3700, .if_dvbt_8 = 4150, .if_dvbt2_6 = 3250, .if_dvbt2_7 = 4000, .if_dvbt2_8 = 4000, .if_dvbc = 5000, }; static struct cx24116_config anysee_cx24116_config = { .demod_address = (0xaa >> 1), .mpg_clk_pos_pol = 0x00, .i2c_wr_max = 48, }; static struct stv0900_config anysee_stv0900_config = { .demod_address = (0xd0 >> 1), .demod_mode = 0, .xtal = 8000000, .clkmode = 3, .diseqc_mode = 2, .tun1_maddress = 0, .tun1_adc = 1, /* 1 Vpp */ .path1_mode = 3, }; static struct stv6110_config anysee_stv6110_config = { .i2c_address = (0xc0 >> 1), .mclk = 16000000, .clk_div = 1, }; static struct isl6423_config anysee_isl6423_config = { .current_max = SEC_CURRENT_800m, .curlim = SEC_CURRENT_LIM_OFF, .mod_extern = 1, .addr = (0x10 >> 1), }; static struct cxd2820r_config anysee_cxd2820r_config = { .i2c_address = 0x6d, /* (0xda >> 1) */ .ts_mode = 0x38, }; /* * New USB device strings: Mfr=1, Product=2, SerialNumber=0 * Manufacturer: AMT.CO.KR * * E30 VID=04b4 PID=861f HW=2 FW=2.1 Product=???????? * PCB: ? * parts: DNOS404ZH102A(MT352, DTT7579(?)) * * E30 VID=04b4 PID=861f HW=2 FW=2.1 "anysee-T(LP)" * PCB: PCB 507T (rev1.61) * parts: DNOS404ZH103A(ZL10353, DTT7579(?)) * OEA=0a OEB=00 OEC=00 OED=ff OEE=00 * IOA=45 IOB=ff IOC=00 IOD=ff IOE=00 * * E30 Plus VID=04b4 PID=861f HW=6 FW=1.0 "anysee" * PCB: 507CD (rev1.1) * parts: DNOS404ZH103A(ZL10353, DTT7579(?)), CST56I01 * OEA=80 OEB=00 OEC=00 OED=ff OEE=fe * IOA=4f IOB=ff IOC=00 IOD=06 IOE=01 * IOD[0] ZL10353 1=enabled * IOA[7] TS 0=enabled * tuner is not behind ZL10353 I2C-gate (no care if gate disabled or not) * * E30 C Plus VID=04b4 PID=861f HW=10 FW=1.0 "anysee-DC(LP)" * PCB: 507DC (rev0.2) * parts: TDA10023, DTOS403IH102B TM, CST56I01 * OEA=80 OEB=00 OEC=00 OED=ff OEE=fe * IOA=4f IOB=ff IOC=00 IOD=26 IOE=01 * IOD[0] TDA10023 1=enabled * * E30 S2 Plus VID=04b4 PID=861f HW=11 FW=0.1 "anysee-S2(LP)" * PCB: 507SI (rev2.1) * parts: BS2N10WCC01(CX24116, CX24118), ISL6423, TDA8024 * OEA=80 OEB=00 OEC=ff OED=ff OEE=fe * IOA=4d IOB=ff IOC=00 IOD=26 IOE=01 * IOD[0] CX24116 1=enabled * * E30 C Plus VID=1c73 PID=861f HW=15 FW=1.2 "anysee-FA(LP)" * PCB: 507FA (rev0.4) * parts: TDA10023, DTOS403IH102B TM, TDA8024 * OEA=80 OEB=00 OEC=ff OED=ff OEE=ff * IOA=4d IOB=ff IOC=00 IOD=00 IOE=c0 * IOD[5] TDA10023 1=enabled * IOE[0] tuner 1=enabled * * E30 Combo Plus VID=1c73 PID=861f HW=15 FW=1.2 "anysee-FA(LP)" * PCB: 507FA (rev1.1) * parts: ZL10353, TDA10023, DTOS403IH102B TM, TDA8024 * OEA=80 OEB=00 OEC=ff OED=ff OEE=ff * IOA=4d IOB=ff IOC=00 IOD=00 IOE=c0 * DVB-C: * IOD[5] TDA10023 1=enabled * IOE[0] tuner 1=enabled * DVB-T: * IOD[0] ZL10353 1=enabled * IOE[0] tuner 0=enabled * tuner is behind ZL10353 I2C-gate * tuner is behind TDA10023 I2C-gate * * E7 TC VID=1c73 PID=861f HW=18 FW=0.7 AMTCI=0.5 "anysee-E7TC(LP)" * PCB: 508TC (rev0.6) * parts: ZL10353, TDA10023, DNOD44CDH086A(TDA18212) * OEA=80 OEB=00 OEC=03 OED=f7 OEE=ff * IOA=4d IOB=00 IOC=cc IOD=48 IOE=e4 * IOA[7] TS 1=enabled * IOE[4] TDA18212 1=enabled * DVB-C: * IOD[6] ZL10353 0=disabled * IOD[5] TDA10023 1=enabled * IOE[0] IF 1=enabled * DVB-T: * IOD[5] TDA10023 0=disabled * IOD[6] ZL10353 1=enabled * IOE[0] IF 0=enabled * * E7 S2 VID=1c73 PID=861f HW=19 FW=0.4 AMTCI=0.5 "anysee-E7S2(LP)" * PCB: 508S2 (rev0.7) * parts: DNBU10512IST(STV0903, STV6110), ISL6423 * OEA=80 OEB=00 OEC=03 OED=f7 OEE=ff * IOA=4d IOB=00 IOC=c4 IOD=08 IOE=e4 * IOA[7] TS 1=enabled * IOE[5] STV0903 1=enabled * * E7 T2C VID=1c73 PID=861f HW=20 FW=0.1 AMTCI=0.5 "anysee-E7T2C(LP)" * PCB: 508T2C (rev0.3) * parts: DNOQ44QCH106A(CXD2820R, TDA18212), TDA8024 * OEA=80 OEB=00 OEC=03 OED=f7 OEE=ff * IOA=4d IOB=00 IOC=cc IOD=48 IOE=e4 * IOA[7] TS 1=enabled * IOE[5] CXD2820R 1=enabled * * E7 PTC VID=1c73 PID=861f HW=21 FW=0.1 AMTCI=?? "anysee-E7PTC(LP)" * PCB: 508PTC (rev0.5) * parts: ZL10353, TDA10023, DNOD44CDH086A(TDA18212) * OEA=80 OEB=00 OEC=03 OED=f7 OEE=ff * IOA=4d IOB=00 IOC=cc IOD=48 IOE=e4 * IOA[7] TS 1=enabled * IOE[4] TDA18212 1=enabled * DVB-C: * IOD[6] ZL10353 0=disabled * IOD[5] TDA10023 1=enabled * IOE[0] IF 1=enabled * DVB-T: * IOD[5] TDA10023 0=disabled * IOD[6] ZL10353 1=enabled * IOE[0] IF 0=enabled * * E7 PS2 VID=1c73 PID=861f HW=22 FW=0.1 AMTCI=?? "anysee-E7PS2(LP)" * PCB: 508PS2 (rev0.4) * parts: DNBU10512IST(STV0903, STV6110), ISL6423 * OEA=80 OEB=00 OEC=03 OED=f7 OEE=ff * IOA=4d IOB=00 IOC=c4 IOD=08 IOE=e4 * IOA[7] TS 1=enabled * IOE[5] STV0903 1=enabled */ static int anysee_read_config(struct dvb_usb_device *d) { struct anysee_state *state = d_to_priv(d); int ret; u8 hw_info[3]; /* * Check which hardware we have. * We must do this call two times to get reliable values (hw/fw bug). */ ret = anysee_get_hw_info(d, hw_info); if (ret) goto error; ret = anysee_get_hw_info(d, hw_info); if (ret) goto error; /* * Meaning of these info bytes are guessed. */ dev_info(&d->udev->dev, "%s: firmware version %d.%d hardware id %d\n", KBUILD_MODNAME, hw_info[1], hw_info[2], hw_info[0]); state->hw = hw_info[0]; error: return ret; } /* external I2C gate used for DNOD44CDH086A(TDA18212) tuner module */ static int anysee_i2c_gate_ctrl(struct dvb_frontend *fe, int enable) { /* enable / disable tuner access on IOE[4] */ return anysee_wr_reg_mask(fe_to_d(fe), REG_IOE, (enable << 4), 0x10); } static int anysee_frontend_ctrl(struct dvb_frontend *fe, int onoff) { struct anysee_state *state = fe_to_priv(fe); struct dvb_usb_device *d = fe_to_d(fe); int ret; dev_dbg(&d->udev->dev, "%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); /* no frontend sleep control */ if (onoff == 0) return 0; switch (state->hw) { case ANYSEE_HW_507FA: /* 15 */ /* E30 Combo Plus */ /* E30 C Plus */ if (fe->id == 0) { /* disable DVB-T demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 0), 0x01); if (ret) goto error; /* enable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 5), 0x20); if (ret) goto error; /* enable DVB-C tuner on IOE[0] */ ret = anysee_wr_reg_mask(d, REG_IOE, (1 << 0), 0x01); if (ret) goto error; } else { /* disable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 5), 0x20); if (ret) goto error; /* enable DVB-T demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 0), 0x01); if (ret) goto error; /* enable DVB-T tuner on IOE[0] */ ret = anysee_wr_reg_mask(d, REG_IOE, (0 << 0), 0x01); if (ret) goto error; } break; case ANYSEE_HW_508TC: /* 18 */ case ANYSEE_HW_508PTC: /* 21 */ /* E7 TC */ /* E7 PTC */ if (fe->id == 0) { /* disable DVB-T demod on IOD[6] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 6), 0x40); if (ret) goto error; /* enable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 5), 0x20); if (ret) goto error; /* enable IF route on IOE[0] */ ret = anysee_wr_reg_mask(d, REG_IOE, (1 << 0), 0x01); if (ret) goto error; } else { /* disable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 5), 0x20); if (ret) goto error; /* enable DVB-T demod on IOD[6] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 6), 0x40); if (ret) goto error; /* enable IF route on IOE[0] */ ret = anysee_wr_reg_mask(d, REG_IOE, (0 << 0), 0x01); if (ret) goto error; } break; default: ret = 0; } error: return ret; } static int anysee_add_i2c_dev(struct dvb_usb_device *d, const char *type, u8 addr, void *platform_data) { int ret, num; struct anysee_state *state = d_to_priv(d); struct i2c_client *client; struct i2c_adapter *adapter = &d->i2c_adap; struct i2c_board_info board_info = { .addr = addr, .platform_data = platform_data, }; strscpy(board_info.type, type, I2C_NAME_SIZE); /* find first free client */ for (num = 0; num < ANYSEE_I2C_CLIENT_MAX; num++) { if (state->i2c_client[num] == NULL) break; } dev_dbg(&d->udev->dev, "%s: num=%d\n", __func__, num); if (num == ANYSEE_I2C_CLIENT_MAX) { dev_err(&d->udev->dev, "%s: I2C client out of index\n", KBUILD_MODNAME); ret = -ENODEV; goto err; } request_module("%s", board_info.type); /* register I2C device */ client = i2c_new_client_device(adapter, &board_info); if (!i2c_client_has_driver(client)) { ret = -ENODEV; goto err; } /* increase I2C driver usage count */ if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); ret = -ENODEV; goto err; } state->i2c_client[num] = client; return 0; err: dev_dbg(&d->udev->dev, "%s: failed=%d\n", __func__, ret); return ret; } static void anysee_del_i2c_dev(struct dvb_usb_device *d) { int num; struct anysee_state *state = d_to_priv(d); struct i2c_client *client; /* find last used client */ num = ANYSEE_I2C_CLIENT_MAX; while (num--) { if (state->i2c_client[num] != NULL) break; } dev_dbg(&d->udev->dev, "%s: num=%d\n", __func__, num); if (num == -1) { dev_err(&d->udev->dev, "%s: I2C client out of index\n", KBUILD_MODNAME); goto err; } client = state->i2c_client[num]; /* decrease I2C driver usage count */ module_put(client->dev.driver->owner); /* unregister I2C device */ i2c_unregister_device(client); state->i2c_client[num] = NULL; err: dev_dbg(&d->udev->dev, "%s: failed\n", __func__); } static int anysee_frontend_attach(struct dvb_usb_adapter *adap) { struct anysee_state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); int ret = 0; u8 tmp; struct i2c_msg msg[2] = { { .addr = 0x60, .flags = 0, .len = 1, .buf = "\x00", }, { .addr = 0x60, .flags = I2C_M_RD, .len = 1, .buf = &tmp, } }; switch (state->hw) { case ANYSEE_HW_507T: /* 2 */ /* E30 */ /* attach demod */ adap->fe[0] = dvb_attach(mt352_attach, &anysee_mt352_config, &d->i2c_adap); if (adap->fe[0]) break; /* attach demod */ adap->fe[0] = dvb_attach(zl10353_attach, &anysee_zl10353_config, &d->i2c_adap); break; case ANYSEE_HW_507CD: /* 6 */ /* E30 Plus */ /* enable DVB-T demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 0), 0x01); if (ret) goto error; /* enable transport stream on IOA[7] */ ret = anysee_wr_reg_mask(d, REG_IOA, (0 << 7), 0x80); if (ret) goto error; /* attach demod */ adap->fe[0] = dvb_attach(zl10353_attach, &anysee_zl10353_config, &d->i2c_adap); break; case ANYSEE_HW_507DC: /* 10 */ /* E30 C Plus */ /* enable DVB-C demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 0), 0x01); if (ret) goto error; /* attach demod */ adap->fe[0] = dvb_attach(tda10023_attach, &anysee_tda10023_config, &d->i2c_adap, 0x48); break; case ANYSEE_HW_507SI: /* 11 */ /* E30 S2 Plus */ /* enable DVB-S/S2 demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 0), 0x01); if (ret) goto error; /* attach demod */ adap->fe[0] = dvb_attach(cx24116_attach, &anysee_cx24116_config, &d->i2c_adap); break; case ANYSEE_HW_507FA: /* 15 */ /* E30 Combo Plus */ /* E30 C Plus */ /* enable tuner on IOE[4] */ ret = anysee_wr_reg_mask(d, REG_IOE, (1 << 4), 0x10); if (ret) goto error; /* probe TDA18212 */ tmp = 0; ret = i2c_transfer(&d->i2c_adap, msg, 2); if (ret == 2 && tmp == 0xc7) { dev_dbg(&d->udev->dev, "%s: TDA18212 found\n", __func__); state->has_tda18212 = true; } else tmp = 0; /* disable tuner on IOE[4] */ ret = anysee_wr_reg_mask(d, REG_IOE, (0 << 4), 0x10); if (ret) goto error; /* disable DVB-T demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 0), 0x01); if (ret) goto error; /* enable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 5), 0x20); if (ret) goto error; /* attach demod */ if (tmp == 0xc7) { /* TDA18212 config */ adap->fe[0] = dvb_attach(tda10023_attach, &anysee_tda10023_tda18212_config, &d->i2c_adap, 0x48); /* I2C gate for DNOD44CDH086A(TDA18212) tuner module */ if (adap->fe[0]) adap->fe[0]->ops.i2c_gate_ctrl = anysee_i2c_gate_ctrl; } else { /* PLL config */ adap->fe[0] = dvb_attach(tda10023_attach, &anysee_tda10023_config, &d->i2c_adap, 0x48); } /* break out if first frontend attaching fails */ if (!adap->fe[0]) break; /* disable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 5), 0x20); if (ret) goto error; /* enable DVB-T demod on IOD[0] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 0), 0x01); if (ret) goto error; /* attach demod */ if (tmp == 0xc7) { /* TDA18212 config */ adap->fe[1] = dvb_attach(zl10353_attach, &anysee_zl10353_tda18212_config2, &d->i2c_adap); /* I2C gate for DNOD44CDH086A(TDA18212) tuner module */ if (adap->fe[1]) adap->fe[1]->ops.i2c_gate_ctrl = anysee_i2c_gate_ctrl; } else { /* PLL config */ adap->fe[1] = dvb_attach(zl10353_attach, &anysee_zl10353_config, &d->i2c_adap); } break; case ANYSEE_HW_508TC: /* 18 */ case ANYSEE_HW_508PTC: /* 21 */ /* E7 TC */ /* E7 PTC */ /* disable DVB-T demod on IOD[6] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 6), 0x40); if (ret) goto error; /* enable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 5), 0x20); if (ret) goto error; /* attach demod */ adap->fe[0] = dvb_attach(tda10023_attach, &anysee_tda10023_tda18212_config, &d->i2c_adap, 0x48); /* I2C gate for DNOD44CDH086A(TDA18212) tuner module */ if (adap->fe[0]) adap->fe[0]->ops.i2c_gate_ctrl = anysee_i2c_gate_ctrl; /* break out if first frontend attaching fails */ if (!adap->fe[0]) break; /* disable DVB-C demod on IOD[5] */ ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 5), 0x20); if (ret) goto error; /* enable DVB-T demod on IOD[6] */ ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 6), 0x40); if (ret) goto error; /* attach demod */ adap->fe[1] = dvb_attach(zl10353_attach, &anysee_zl10353_tda18212_config, &d->i2c_adap); /* I2C gate for DNOD44CDH086A(TDA18212) tuner module */ if (adap->fe[1]) adap->fe[1]->ops.i2c_gate_ctrl = anysee_i2c_gate_ctrl; state->has_ci = true; break; case ANYSEE_HW_508S2: /* 19 */ case ANYSEE_HW_508PS2: /* 22 */ /* E7 S2 */ /* E7 PS2 */ /* enable DVB-S/S2 demod on IOE[5] */ ret = anysee_wr_reg_mask(d, REG_IOE, (1 << 5), 0x20); if (ret) goto error; /* attach demod */ adap->fe[0] = dvb_attach(stv0900_attach, &anysee_stv0900_config, &d->i2c_adap, 0); state->has_ci = true; break; case ANYSEE_HW_508T2C: /* 20 */ /* E7 T2C */ /* enable DVB-T/T2/C demod on IOE[5] */ ret = anysee_wr_reg_mask(d, REG_IOE, (1 << 5), 0x20); if (ret) goto error; /* attach demod */ adap->fe[0] = dvb_attach(cxd2820r_attach, &anysee_cxd2820r_config, &d->i2c_adap, NULL); state->has_ci = true; break; } if (!adap->fe[0]) { /* we have no frontend :-( */ ret = -ENODEV; dev_err(&d->udev->dev, "%s: Unsupported Anysee version. Please report to <linux-media@vger.kernel.org>.\n", KBUILD_MODNAME); } error: return ret; } static int anysee_tuner_attach(struct dvb_usb_adapter *adap) { struct anysee_state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct dvb_frontend *fe; int ret; dev_dbg(&d->udev->dev, "%s:\n", __func__); switch (state->hw) { case ANYSEE_HW_507T: /* 2 */ /* E30 */ /* attach tuner */ fe = dvb_attach(dvb_pll_attach, adap->fe[0], (0xc2 >> 1), NULL, DVB_PLL_THOMSON_DTT7579); break; case ANYSEE_HW_507CD: /* 6 */ /* E30 Plus */ /* attach tuner */ fe = dvb_attach(dvb_pll_attach, adap->fe[0], (0xc2 >> 1), &d->i2c_adap, DVB_PLL_THOMSON_DTT7579); break; case ANYSEE_HW_507DC: /* 10 */ /* E30 C Plus */ /* attach tuner */ fe = dvb_attach(dvb_pll_attach, adap->fe[0], (0xc0 >> 1), &d->i2c_adap, DVB_PLL_SAMSUNG_DTOS403IH102A); break; case ANYSEE_HW_507SI: /* 11 */ /* E30 S2 Plus */ /* attach LNB controller */ fe = dvb_attach(isl6423_attach, adap->fe[0], &d->i2c_adap, &anysee_isl6423_config); break; case ANYSEE_HW_507FA: /* 15 */ /* E30 Combo Plus */ /* E30 C Plus */ /* Try first attach TDA18212 silicon tuner on IOE[4], if that * fails attach old simple PLL. */ /* attach tuner */ if (state->has_tda18212) { struct tda18212_config tda18212_config = anysee_tda18212_config; tda18212_config.fe = adap->fe[0]; ret = anysee_add_i2c_dev(d, "tda18212", 0x60, &tda18212_config); if (ret) goto err; /* copy tuner ops for 2nd FE as tuner is shared */ if (adap->fe[1]) { adap->fe[1]->tuner_priv = adap->fe[0]->tuner_priv; memcpy(&adap->fe[1]->ops.tuner_ops, &adap->fe[0]->ops.tuner_ops, sizeof(struct dvb_tuner_ops)); } return 0; } else { /* attach tuner */ fe = dvb_attach(dvb_pll_attach, adap->fe[0], (0xc0 >> 1), &d->i2c_adap, DVB_PLL_SAMSUNG_DTOS403IH102A); if (fe && adap->fe[1]) { /* attach tuner for 2nd FE */ fe = dvb_attach(dvb_pll_attach, adap->fe[1], (0xc0 >> 1), &d->i2c_adap, DVB_PLL_SAMSUNG_DTOS403IH102A); } } break; case ANYSEE_HW_508TC: /* 18 */ case ANYSEE_HW_508PTC: /* 21 */ { /* E7 TC */ /* E7 PTC */ struct tda18212_config tda18212_config = anysee_tda18212_config; tda18212_config.fe = adap->fe[0]; ret = anysee_add_i2c_dev(d, "tda18212", 0x60, &tda18212_config); if (ret) goto err; /* copy tuner ops for 2nd FE as tuner is shared */ if (adap->fe[1]) { adap->fe[1]->tuner_priv = adap->fe[0]->tuner_priv; memcpy(&adap->fe[1]->ops.tuner_ops, &adap->fe[0]->ops.tuner_ops, sizeof(struct dvb_tuner_ops)); } return 0; } case ANYSEE_HW_508S2: /* 19 */ case ANYSEE_HW_508PS2: /* 22 */ /* E7 S2 */ /* E7 PS2 */ /* attach tuner */ fe = dvb_attach(stv6110_attach, adap->fe[0], &anysee_stv6110_config, &d->i2c_adap); if (fe) { /* attach LNB controller */ fe = dvb_attach(isl6423_attach, adap->fe[0], &d->i2c_adap, &anysee_isl6423_config); } break; case ANYSEE_HW_508T2C: /* 20 */ { /* E7 T2C */ struct tda18212_config tda18212_config = anysee_tda18212_config2; tda18212_config.fe = adap->fe[0]; ret = anysee_add_i2c_dev(d, "tda18212", 0x60, &tda18212_config); if (ret) goto err; return 0; } default: fe = NULL; } if (fe) ret = 0; else ret = -ENODEV; err: return ret; } #if IS_ENABLED(CONFIG_RC_CORE) static int anysee_rc_query(struct dvb_usb_device *d) { u8 buf[] = {CMD_GET_IR_CODE}; u8 ircode[2]; int ret; /* Remote controller is basic NEC using address byte 0x08. Anysee device RC query returns only two bytes, status and code, address byte is dropped. Also it does not return any value for NEC RCs having address byte other than 0x08. Due to that, we cannot use that device as standard NEC receiver. It could be possible make hack which reads whole code directly from device memory... */ ret = anysee_ctrl_msg(d, buf, sizeof(buf), ircode, sizeof(ircode)); if (ret) return ret; if (ircode[0]) { dev_dbg(&d->udev->dev, "%s: key pressed %02x\n", __func__, ircode[1]); rc_keydown(d->rc_dev, RC_PROTO_NEC, RC_SCANCODE_NEC(0x08, ircode[1]), 0); } return 0; } static int anysee_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc) { rc->allowed_protos = RC_PROTO_BIT_NEC; rc->query = anysee_rc_query; rc->interval = 250; /* windows driver uses 500ms */ return 0; } #else #define anysee_get_rc_config NULL #endif static int anysee_ci_read_attribute_mem(struct dvb_ca_en50221 *ci, int slot, int addr) { struct dvb_usb_device *d = ci->data; int ret; u8 buf[] = {CMD_CI, 0x02, 0x40 | addr >> 8, addr & 0xff, 0x00, 1}; u8 val; ret = anysee_ctrl_msg(d, buf, sizeof(buf), &val, 1); if (ret) return ret; return val; } static int anysee_ci_write_attribute_mem(struct dvb_ca_en50221 *ci, int slot, int addr, u8 val) { struct dvb_usb_device *d = ci->data; u8 buf[] = {CMD_CI, 0x03, 0x40 | addr >> 8, addr & 0xff, 0x00, 1, val}; return anysee_ctrl_msg(d, buf, sizeof(buf), NULL, 0); } static int anysee_ci_read_cam_control(struct dvb_ca_en50221 *ci, int slot, u8 addr) { struct dvb_usb_device *d = ci->data; int ret; u8 buf[] = {CMD_CI, 0x04, 0x40, addr, 0x00, 1}; u8 val; ret = anysee_ctrl_msg(d, buf, sizeof(buf), &val, 1); if (ret) return ret; return val; } static int anysee_ci_write_cam_control(struct dvb_ca_en50221 *ci, int slot, u8 addr, u8 val) { struct dvb_usb_device *d = ci->data; u8 buf[] = {CMD_CI, 0x05, 0x40, addr, 0x00, 1, val}; return anysee_ctrl_msg(d, buf, sizeof(buf), NULL, 0); } static int anysee_ci_slot_reset(struct dvb_ca_en50221 *ci, int slot) { struct dvb_usb_device *d = ci->data; int ret; struct anysee_state *state = d_to_priv(d); state->ci_cam_ready = jiffies + msecs_to_jiffies(1000); ret = anysee_wr_reg_mask(d, REG_IOA, (0 << 7), 0x80); if (ret) return ret; msleep(300); ret = anysee_wr_reg_mask(d, REG_IOA, (1 << 7), 0x80); if (ret) return ret; return 0; } static int anysee_ci_slot_shutdown(struct dvb_ca_en50221 *ci, int slot) { struct dvb_usb_device *d = ci->data; int ret; ret = anysee_wr_reg_mask(d, REG_IOA, (0 << 7), 0x80); if (ret) return ret; msleep(30); ret = anysee_wr_reg_mask(d, REG_IOA, (1 << 7), 0x80); if (ret) return ret; return 0; } static int anysee_ci_slot_ts_enable(struct dvb_ca_en50221 *ci, int slot) { struct dvb_usb_device *d = ci->data; return anysee_wr_reg_mask(d, REG_IOD, (0 << 1), 0x02); } static int anysee_ci_poll_slot_status(struct dvb_ca_en50221 *ci, int slot, int open) { struct dvb_usb_device *d = ci->data; struct anysee_state *state = d_to_priv(d); int ret; u8 tmp = 0; ret = anysee_rd_reg_mask(d, REG_IOC, &tmp, 0x40); if (ret) return ret; if (tmp == 0) { ret = DVB_CA_EN50221_POLL_CAM_PRESENT; if (time_after(jiffies, state->ci_cam_ready)) ret |= DVB_CA_EN50221_POLL_CAM_READY; } return ret; } static int anysee_ci_init(struct dvb_usb_device *d) { struct anysee_state *state = d_to_priv(d); int ret; state->ci.owner = THIS_MODULE; state->ci.read_attribute_mem = anysee_ci_read_attribute_mem; state->ci.write_attribute_mem = anysee_ci_write_attribute_mem; state->ci.read_cam_control = anysee_ci_read_cam_control; state->ci.write_cam_control = anysee_ci_write_cam_control; state->ci.slot_reset = anysee_ci_slot_reset; state->ci.slot_shutdown = anysee_ci_slot_shutdown; state->ci.slot_ts_enable = anysee_ci_slot_ts_enable; state->ci.poll_slot_status = anysee_ci_poll_slot_status; state->ci.data = d; ret = anysee_wr_reg_mask(d, REG_IOA, (1 << 7), 0x80); if (ret) return ret; ret = anysee_wr_reg_mask(d, REG_IOD, (0 << 2)|(0 << 1)|(0 << 0), 0x07); if (ret) return ret; ret = anysee_wr_reg_mask(d, REG_IOD, (1 << 2)|(1 << 1)|(1 << 0), 0x07); if (ret) return ret; ret = dvb_ca_en50221_init(&d->adapter[0].dvb_adap, &state->ci, 0, 1); if (ret) return ret; state->ci_attached = true; return 0; } static void anysee_ci_release(struct dvb_usb_device *d) { struct anysee_state *state = d_to_priv(d); /* detach CI */ if (state->ci_attached) dvb_ca_en50221_release(&state->ci); return; } static int anysee_init(struct dvb_usb_device *d) { struct anysee_state *state = d_to_priv(d); int ret; /* There is one interface with two alternate settings. Alternate setting 0 is for bulk transfer. Alternate setting 1 is for isochronous transfer. We use bulk transfer (alternate setting 0). */ ret = usb_set_interface(d->udev, 0, 0); if (ret) return ret; /* LED light */ ret = anysee_led_ctrl(d, 0x01, 0x03); if (ret) return ret; /* enable IR */ ret = anysee_ir_ctrl(d, 1); if (ret) return ret; /* attach CI */ if (state->has_ci) { ret = anysee_ci_init(d); if (ret) return ret; } return 0; } static void anysee_exit(struct dvb_usb_device *d) { struct anysee_state *state = d_to_priv(d); if (state->i2c_client[0]) anysee_del_i2c_dev(d); return anysee_ci_release(d); } /* DVB USB Driver stuff */ static struct dvb_usb_device_properties anysee_props = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct anysee_state), .generic_bulk_ctrl_endpoint = 0x01, .generic_bulk_ctrl_endpoint_response = 0x81, .i2c_algo = &anysee_i2c_algo, .read_config = anysee_read_config, .frontend_attach = anysee_frontend_attach, .tuner_attach = anysee_tuner_attach, .init = anysee_init, .get_rc_config = anysee_get_rc_config, .frontend_ctrl = anysee_frontend_ctrl, .streaming_ctrl = anysee_streaming_ctrl, .exit = anysee_exit, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_BULK(0x82, 8, 16 * 512), } } }; static const struct usb_device_id anysee_id_table[] = { { DVB_USB_DEVICE(USB_VID_CYPRESS, USB_PID_ANYSEE, &anysee_props, "Anysee", RC_MAP_ANYSEE) }, { DVB_USB_DEVICE(USB_VID_AMT, USB_PID_ANYSEE, &anysee_props, "Anysee", RC_MAP_ANYSEE) }, { } }; MODULE_DEVICE_TABLE(usb, anysee_id_table); static struct usb_driver anysee_usb_driver = { .name = KBUILD_MODNAME, .id_table = anysee_id_table, .probe = dvb_usbv2_probe, .disconnect = dvb_usbv2_disconnect, .suspend = dvb_usbv2_suspend, .resume = dvb_usbv2_resume, .reset_resume = dvb_usbv2_reset_resume, .no_dynamic_id = 1, .soft_unbind = 1, }; module_usb_driver(anysee_usb_driver); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_DESCRIPTION("Driver Anysee E30 DVB-C & DVB-T USB2.0"); MODULE_LICENSE("GPL"); |
3861 98 6575 282 882 3605 92 48 46 9117 9049 135 59 46 15 10 4 7 386 1475 12 162 4172 174 394 277 110 16 93 379 6321 6320 804 782 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H #include <linux/mem_encrypt.h> #include <asm/page.h> #include <asm/pgtable_types.h> /* * Macro to mark a page protection value as UC- */ #define pgprot_noncached(prot) \ ((boot_cpu_data.x86 > 3) \ ? (__pgprot(pgprot_val(prot) | \ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) #ifndef __ASSEMBLY__ #include <linux/spinlock.h> #include <asm/x86_init.h> #include <asm/pkru.h> #include <asm/fpu/api.h> #include <asm/coco.h> #include <asm-generic/pgtable_uffd.h> #include <linux/page_table_check.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); struct seq_file; void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); bool ptdump_walk_pgd_level_checkwx(void); #define ptdump_check_wx ptdump_walk_pgd_level_checkwx void ptdump_walk_user_pgd_level_checkwx(void); /* * Macros to add or remove encryption attribute */ #define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) #define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) #ifdef CONFIG_DEBUG_WX #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else #define debug_checkwx_user() do { } while (0) #endif /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __visible; #define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; extern struct list_head pgd_list; extern struct mm_struct *pgd_page_get_mm(struct page *page); extern pmdval_t early_pmd_flags; #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) #define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) #define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d # define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d) #endif #ifndef __PAGETABLE_PUD_FOLDED #define p4d_clear(p4d) native_p4d_clear(p4d) #endif #ifndef set_pud # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif #ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pmd_clear(pmd) native_pmd_clear(pmd) #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) #ifndef __PAGETABLE_P4D_FOLDED #define p4d_val(x) native_p4d_val(x) #define __p4d(x) native_make_p4d(x) #endif #ifndef __PAGETABLE_PUD_FOLDED #define pud_val(x) native_pud_val(x) #define __pud(x) native_make_pud(x) #endif #ifndef __PAGETABLE_PMD_FOLDED #define pmd_val(x) native_pmd_val(x) #define __pmd(x) native_make_pmd(x) #endif #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) #define arch_end_context_switch(prev) do {} while(0) #endif /* CONFIG_PARAVIRT_XXL */ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); return native_make_pmd(v & ~clear); } static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); return native_make_pud(v & ~clear); } /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ static inline bool pte_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_DIRTY_BITS; } static inline bool pte_shstk(pte_t pte) { return cpu_feature_enabled(X86_FEATURE_SHSTK) && (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; } static inline bool pte_decrypted(pte_t pte) { return cc_mkdec(pte_val(pte)) == pte_val(pte); } #define pmd_dirty pmd_dirty static inline bool pmd_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_DIRTY_BITS; } static inline bool pmd_shstk(pmd_t pmd) { return cpu_feature_enabled(X86_FEATURE_SHSTK) && (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == (_PAGE_DIRTY | _PAGE_PSE); } #define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; } static inline bool pud_dirty(pud_t pud) { return pud_flags(pud) & _PAGE_DIRTY_BITS; } static inline int pud_young(pud_t pud) { return pud_flags(pud) & _PAGE_ACCESSED; } static inline bool pud_shstk(pud_t pud) { return cpu_feature_enabled(X86_FEATURE_SHSTK) && (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == (_PAGE_DIRTY | _PAGE_PSE); } static inline int pte_write(pte_t pte) { /* * Shadow stack pages are logically writable, but do not have * _PAGE_RW. Check for them separately from _PAGE_RW itself. */ return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte); } #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { /* * Shadow stack pages are logically writable, but do not have * _PAGE_RW. Check for them separately from _PAGE_RW itself. */ return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd); } #define pud_write pud_write static inline int pud_write(pud_t pud) { return pud_flags(pud) & _PAGE_RW; } static inline int pte_huge(pte_t pte) { return pte_flags(pte) & _PAGE_PSE; } static inline int pte_global(pte_t pte) { return pte_flags(pte) & _PAGE_GLOBAL; } static inline int pte_exec(pte_t pte) { return !(pte_flags(pte) & _PAGE_NX); } static inline int pte_special(pte_t pte) { return pte_flags(pte) & _PAGE_SPECIAL; } /* Entries that were set to PROT_NONE are inverted */ static inline u64 protnone_mask(u64 val); #define PFN_PTE_SHIFT PAGE_SHIFT static inline unsigned long pte_pfn(pte_t pte) { phys_addr_t pfn = pte_val(pte); pfn ^= protnone_mask(pfn); return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { phys_addr_t pfn = pmd_val(pmd); pfn ^= protnone_mask(pfn); return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } #define pud_pfn pud_pfn static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); pfn ^= protnone_mask(pfn); return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } static inline unsigned long p4d_pfn(p4d_t p4d) { return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT; } static inline unsigned long pgd_pfn(pgd_t pgd) { return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } #define p4d_leaf p4d_leaf static inline bool p4d_leaf(p4d_t p4d) { /* No 512 GiB pages yet */ return 0; } #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define pmd_leaf pmd_leaf static inline bool pmd_leaf(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_trans_huge(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } #endif #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { return boot_cpu_has(X86_FEATURE_PSE); } #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pmd_devmap(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_DEVMAP); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_devmap(pud_t pud) { return !!(pud_val(pud) & _PAGE_DEVMAP); } #else static inline int pud_devmap(pud_t pud) { return 0; } #endif #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP static inline bool pmd_special(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SPECIAL; } static inline pmd_t pmd_mkspecial(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SPECIAL); } #endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */ #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP static inline bool pud_special(pud_t pud) { return pud_flags(pud) & _PAGE_SPECIAL; } static inline pud_t pud_mkspecial(pud_t pud) { return pud_set_flags(pud, _PAGE_SPECIAL); } #endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ static inline int pgd_devmap(pgd_t pgd) { return 0; } #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline pte_t pte_set_flags(pte_t pte, pteval_t set) { pteval_t v = native_pte_val(pte); return native_make_pte(v | set); } static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) { pteval_t v = native_pte_val(pte); return native_make_pte(v & ~clear); } /* * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So * when creating dirty, write-protected memory, a software bit is used: * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the * Dirty bit to SavedDirty, and vice-vesra. * * This shifting is only done if needed. In the case of shifting * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of * shifting SavedDirty->Dirty, the condition is Write=1. */ static inline pgprotval_t mksaveddirty_shift(pgprotval_t v) { pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1; v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY; v &= ~(cond << _PAGE_BIT_DIRTY); return v; } static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v) { pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1; v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY; v &= ~(cond << _PAGE_BIT_SAVED_DIRTY); return v; } static inline pte_t pte_mksaveddirty(pte_t pte) { pteval_t v = native_pte_val(pte); v = mksaveddirty_shift(v); return native_make_pte(v); } static inline pte_t pte_clear_saveddirty(pte_t pte) { pteval_t v = native_pte_val(pte); v = clear_saveddirty_shift(v); return native_make_pte(v); } static inline pte_t pte_wrprotect(pte_t pte) { pte = pte_clear_flags(pte, _PAGE_RW); /* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PTE (Write=0,Dirty=1). Move the hardware * dirty value to the software bit, if present. */ return pte_mksaveddirty(pte); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { return pte_flags(pte) & _PAGE_UFFD_WP; } static inline pte_t pte_mkuffd_wp(pte_t pte) { return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP)); } static inline pte_t pte_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY_BITS); } static inline pte_t pte_mkold(pte_t pte) { return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkexec(pte_t pte) { return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); return pte_mksaveddirty(pte); } static inline pte_t pte_mkwrite_shstk(pte_t pte) { pte = pte_clear_flags(pte, _PAGE_RW); return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite_novma(pte_t pte) { return pte_set_flags(pte, _PAGE_RW); } struct vm_area_struct; pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma); #define pte_mkwrite pte_mkwrite static inline pte_t pte_mkhuge(pte_t pte) { return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { return pte_set_flags(pte, _PAGE_SPECIAL); } static inline pte_t pte_mkdevmap(pte_t pte) { return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); } /* See comments above mksaveddirty_shift() */ static inline pmd_t pmd_mksaveddirty(pmd_t pmd) { pmdval_t v = native_pmd_val(pmd); v = mksaveddirty_shift(v); return native_make_pmd(v); } /* See comments above mksaveddirty_shift() */ static inline pmd_t pmd_clear_saveddirty(pmd_t pmd) { pmdval_t v = native_pmd_val(pmd); v = clear_saveddirty_shift(v); return native_make_pmd(v); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { pmd = pmd_clear_flags(pmd, _PAGE_RW); /* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PMD (RW=0, Dirty=1). Move the hardware * dirty value to the software bit. */ return pmd_mksaveddirty(pmd); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pmd_uffd_wp(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_UFFD_WP; } static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) { return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP)); } static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); } static inline pmd_t pmd_mkclean(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); return pmd_mksaveddirty(pmd); } static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd) { pmd = pmd_clear_flags(pmd, _PAGE_RW); return pmd_set_flags(pmd, _PAGE_DIRTY); } static inline pmd_t pmd_mkdevmap(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DEVMAP); } static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_PSE); } static inline pmd_t pmd_mkyoung(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_ACCESSED); } static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_RW); } pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); #define pmd_mkwrite pmd_mkwrite /* See comments above mksaveddirty_shift() */ static inline pud_t pud_mksaveddirty(pud_t pud) { pudval_t v = native_pud_val(pud); v = mksaveddirty_shift(v); return native_make_pud(v); } /* See comments above mksaveddirty_shift() */ static inline pud_t pud_clear_saveddirty(pud_t pud) { pudval_t v = native_pud_val(pud); v = clear_saveddirty_shift(v); return native_make_pud(v); } static inline pud_t pud_mkold(pud_t pud) { return pud_clear_flags(pud, _PAGE_ACCESSED); } static inline pud_t pud_mkclean(pud_t pud) { return pud_clear_flags(pud, _PAGE_DIRTY_BITS); } static inline pud_t pud_wrprotect(pud_t pud) { pud = pud_clear_flags(pud, _PAGE_RW); /* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PUD (RW=0, Dirty=1). Move the hardware * dirty value to the software bit. */ return pud_mksaveddirty(pud); } static inline pud_t pud_mkdirty(pud_t pud) { pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); return pud_mksaveddirty(pud); } static inline pud_t pud_mkdevmap(pud_t pud) { return pud_set_flags(pud, _PAGE_DEVMAP); } static inline pud_t pud_mkhuge(pud_t pud) { return pud_set_flags(pud, _PAGE_PSE); } static inline pud_t pud_mkyoung(pud_t pud) { return pud_set_flags(pud, _PAGE_ACCESSED); } static inline pud_t pud_mkwrite(pud_t pud) { pud = pud_set_flags(pud, _PAGE_RW); return pud_clear_saveddirty(pud); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_SOFT_DIRTY; } static inline int pmd_soft_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; } static inline int pud_soft_dirty(pud_t pud) { return pud_flags(pud) & _PAGE_SOFT_DIRTY; } static inline pte_t pte_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } static inline pud_t pud_mksoft_dirty(pud_t pud) { return pud_set_flags(pud, _PAGE_SOFT_DIRTY); } static inline pte_t pte_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); } static inline pud_t pud_clear_soft_dirty(pud_t pud) { return pud_clear_flags(pud, _PAGE_SOFT_DIRTY); } #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. */ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) { pgprotval_t protval = pgprot_val(pgprot); if (protval & _PAGE_PRESENT) protval &= __supported_pte_mask; return protval; } static inline pgprotval_t check_pgprot(pgprot_t pgprot) { pgprotval_t massaged_val = massage_pgprot(pgprot); /* mmdebug.h can not be included here because of dependencies */ #ifdef CONFIG_DEBUG_VM WARN_ONCE(pgprot_val(pgprot) != massaged_val, "attempted to set unsupported pgprot: %016llx " "bits: %016llx supported: %016llx\n", (u64)pgprot_val(pgprot), (u64)pgprot_val(pgprot) ^ massaged_val, (u64)__supported_pte_mask); #endif return massaged_val; } static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PTE_PFN_MASK; return __pte(pfn | check_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PMD_PAGE_MASK; return __pmd(pfn | check_pgprot(pgprot)); } static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PUD_PAGE_MASK; return __pud(pfn | check_pgprot(pgprot)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } static inline pud_t pud_mkinvalid(pud_t pud) { return pfn_pud(pud_pfn(pud), __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pteval_t val = pte_val(pte), oldval = val; pte_t pte_result; /* * Chop off the NX bit (if present), and add the NX portion of * the newprot (if present): */ val &= _PAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); pte_result = __pte(val); /* * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid: * 1. Marking Write=0 PTEs Dirty=1 * 2. Marking Dirty=1 PTEs Write=0 * * The first case cannot happen because the _PAGE_CHG_MASK will filter * out any Dirty bit passed in newprot. Handle the second case by * going through the mksaveddirty exercise. Only do this if the old * value was Write=1 to avoid doing this on Shadow Stack PTEs. */ if (oldval & _PAGE_RW) pte_result = pte_mksaveddirty(pte_result); else pte_result = pte_clear_saveddirty(pte_result); return pte_result; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { pmdval_t val = pmd_val(pmd), oldval = val; pmd_t pmd_result; val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY); val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); pmd_result = __pmd(val); /* * Avoid creating shadow stack PMD by accident. See comment in * pte_modify(). */ if (oldval & _PAGE_RW) pmd_result = pmd_mksaveddirty(pmd_result); else pmd_result = pmd_clear_saveddirty(pmd_result); return pmd_result; } static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) { pudval_t val = pud_val(pud), oldval = val; pud_t pud_result; val &= _HPAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK); pud_result = __pud(val); /* * Avoid creating shadow stack PUD by accident. See comment in * pte_modify(). */ if (oldval & _PAGE_RW) pud_result = pud_mksaveddirty(pud_result); else pud_result = pud_clear_saveddirty(pud_result); return pud_result; } /* * mprotect needs to preserve PAT and encryption bits when updating * vm_page_prot */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } #define pte_pgprot(x) __pgprot(pte_flags(x)) #define pmd_pgprot(x) __pgprot(pmd_flags(x)) #define pud_pgprot(x) __pgprot(pud_flags(x)) #define p4d_pgprot(x) __pgprot(p4d_flags(x)) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) { /* * PAT type is always WB for untracked ranges, so no need to check. */ if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) return 1; /* * Certain new memtypes are not allowed with certain * requested memtype: * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back * - request is write-through, return cannot be write-back * - request is write-through, return cannot be write-combine */ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WC && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WT && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WT && new_pcm == _PAGE_CACHE_MODE_WC)) { return 0; } return 1; } pmd_t *populate_extra_pmd(unsigned long vaddr); pte_t *populate_extra_pte(unsigned long vaddr); #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); /* * Take a PGD location (pgdp) and a pgd value that needs to be set there. * Populates the user and returns the resulting PGD that must be set in * the kernel copy of the page tables. */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { if (!static_cpu_has(X86_FEATURE_PTI)) return pgd; return __pti_set_user_pgtbl(pgdp, pgd); } #else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { return pgd; } #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 # include <asm/pgtable_32.h> #else # include <asm/pgtable_64.h> #endif #ifndef __ASSEMBLY__ #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/log2.h> #include <asm/fixmap.h> static inline int pte_none(pte_t pte) { return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK)); } #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) { return a.pte == b.pte; } static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { if (__pte_needs_invert(pte_val(pte))) return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT)); return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); } #define pte_advance_pfn pte_advance_pfn static inline int pte_present(pte_t a) { return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t a) { return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; } #endif #define pte_accessible pte_accessible static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { if (pte_flags(a) & _PAGE_PRESENT) return true; if ((pte_flags(a) & _PAGE_PROTNONE) && atomic_read(&mm->tlb_flush_pending)) return true; return false; } static inline int pmd_present(pmd_t pmd) { /* * Checking for _PAGE_PSE is needed too because * split_huge_page will temporarily clear the present bit (but * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear). */ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT)) == _PAGE_PROTNONE; } static inline int pmd_protnone(pmd_t pmd) { return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT)) == _PAGE_PROTNONE; } #endif /* CONFIG_NUMA_BALANCING */ static inline int pmd_none(pmd_t pmd) { /* Only check low word on 32-bit platforms, since it might be out of sync with upper half. */ unsigned long val = native_pmd_val(pmd); return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * * (Currently stuck as a macro because of indirect forward reference * to linux/mm.h:page_to_nid()) */ #define mk_pte(page, pgprot) \ ({ \ pgprot_t __pgprot = pgprot; \ \ WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \ _PAGE_DIRTY); \ pfn_pte(page_to_pfn(page), __pgprot); \ }) static inline int pmd_bad(pmd_t pmd) { return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) { return npg >> (20 - PAGE_SHIFT); } #if CONFIG_PGTABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int pud_present(pud_t pud) { return pud_flags(pud) & _PAGE_PRESENT; } static inline pmd_t *pud_pgtable(pud_t pud) { return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) #define pud_leaf pud_leaf static inline bool pud_leaf(pud_t pud) { return pud_val(pud) & _PAGE_PSE; } static inline int pud_bad(pud_t pud) { return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int p4d_present(p4d_t p4d) { return p4d_flags(p4d) & _PAGE_PRESENT; } static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ static inline unsigned long p4d_index(unsigned long address) { return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1); } #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { if (!pgtable_l5_enabled()) return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } static inline unsigned long pgd_page_vaddr(pgd_t pgd) { return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { if (!pgtable_l5_enabled()) return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; if (!pgtable_l5_enabled()) return 0; if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) { if (!pgtable_l5_enabled()) return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables * except on 32-bit non-PAE which is not supported on * KNL. */ return !native_pgd_val(pgd); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* __ASSEMBLY__ */ #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) #ifndef __ASSEMBLY__ extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); #ifdef CONFIG_X86_64 extern pgd_t trampoline_pgd_entry; #endif /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { pte_t res = *ptep; /* Pure native function needs no input for mm, addr */ native_pte_clear(NULL, 0, ptep); return res; } static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) { pmd_t res = *pmdp; native_pmd_clear(pmdp); return res; } static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) { pud_t res = *pudp; native_pud_clear(pudp); return res; } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(mm, pmdp, pmd); set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { page_table_check_pud_set(mm, pudp, pud); native_set_pud(pudp, pud); } /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware * will do the accessed bit for us, and we don't want to * race with other CPU's that might be updating the dirty * bit at the same time. */ struct vm_area_struct; #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH extern int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); page_table_check_pte_clear(mm, pte); return pte; } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { pte_t pte; if (full) { /* * Full address destruction in progress; paravirt does not * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); page_table_check_pte_clear(mm, pte); } else { pte = ptep_get_and_clear(mm, addr, ptep); } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* * Avoid accidentally creating shadow stack PTEs * (Write=0,Dirty=1). Use cmpxchg() to prevent races with * the hardware setting Dirty=1. */ pte_t old_pte, new_pte; old_pte = READ_ONCE(*ptep); do { new_pte = pte_wrprotect(old_pte); } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); } #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); extern int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t entry, int dirty); #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp); extern int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp); #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { pmd_t pmd = native_pmdp_get_and_clear(pmdp); page_table_check_pmd_clear(mm, pmd); return pmd; } #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { pud_t pud = native_pudp_get_and_clear(pudp); page_table_check_pud_clear(mm, pud); return pud; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { /* * Avoid accidentally creating shadow stack PTEs * (Write=0,Dirty=1). Use cmpxchg() to prevent races with * the hardware setting Dirty=1. */ pmd_t old_pmd, new_pmd; old_pmd = READ_ONCE(*pmdp); do { new_pmd = pmd_wrprotect(old_pmd); } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); } #ifndef pmdp_establish #define pmdp_establish pmdp_establish static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { pmd_t old = *pmdp; WRITE_ONCE(*pmdp, pmd); return old; } } #endif #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline pud_t pudp_establish(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t pud) { page_table_check_pud_set(vma->vm_mm, pudp, pud); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pudp, pud); } else { pud_t old = *pudp; WRITE_ONCE(*pudp, pud); return old; } } #endif #define __HAVE_ARCH_PMDP_INVALIDATE_AD extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, pud_t *pudp); /* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. * * Returns true for parts of the PGD that map userspace and * false for the parts that map the kernel. */ static inline bool pgdp_maps_userspace(void *__ptr) { unsigned long ptr = (unsigned long)__ptr; return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } #define pgd_leaf pgd_leaf static inline bool pgd_leaf(pgd_t pgd) { return false; } #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and * the user one is in the last 4k. To switch between them, you * just need to flip the 12th bit in their addresses. */ #define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT /* * This generates better code than the inline assembly in * __set_bit(). */ static inline void *ptr_set_bit(void *ptr, int bit) { unsigned long __ptr = (unsigned long)ptr; __ptr |= BIT(bit); return (void *)__ptr; } static inline void *ptr_clear_bit(void *ptr, int bit) { unsigned long __ptr = (unsigned long)ptr; __ptr &= ~BIT(bit); return (void *)__ptr; } static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) { return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); } static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) { return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); } static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) { return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) { return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * * dst - pointer to pgd range anywhere on a pgd page * src - "" * count - the number of pgds to copy. * * dst and src can be on the same page, but the range must not overlap, * and must not cross a page boundary. */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */ memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), count * sizeof(pgd_t)); #endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) static inline int page_level_shift(enum pg_level level) { return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; } static inline unsigned long page_level_size(enum pg_level level) { return 1UL << page_level_shift(level); } static inline unsigned long page_level_mask(enum pg_level level) { return ~(page_level_size(level) - 1); } /* * The x86 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { } static inline void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr) { } static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { } static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } static inline pte_t pte_swp_mkexclusive(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); } static inline int pte_swp_exclusive(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; } static inline pte_t pte_swp_clear_exclusive(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); } static inline int pte_swp_soft_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; } static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY); } static inline int pmd_swp_soft_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY; } static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY); } #endif #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline pte_t pte_swp_mkuffd_wp(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); } static inline int pte_swp_uffd_wp(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_UFFD_WP; } static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); } static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); } static inline int pmd_swp_uffd_wp(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; } static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* ifdef to avoid doing 59-bit shift on 32-bit values */ return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0; #else return 0; #endif } static inline bool __pkru_allows_pkey(u16 pkey, bool write) { u32 pkru = read_pkru(); if (!__pkru_allows_read(pkru, pkey)) return false; if (write && !__pkru_allows_write(pkru, pkey)) return false; return true; } /* * 'pteval' can come from a PTE, PMD or PUD. We only check * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the * same value on all 3 types. */ static inline bool __pte_access_permitted(unsigned long pteval, bool write) { unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; /* * Write=0,Dirty=1 PTEs are shadow stack, which the kernel * shouldn't generally allow access to, but since they * are already Write=0, the below logic covers both cases. */ if (write) need_pte_bits |= _PAGE_RW; if ((pteval & need_pte_bits) != need_pte_bits) return 0; return __pkru_allows_pkey(pte_flags_pkey(pteval), write); } #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { return __pte_access_permitted(pte_val(pte), write); } #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) { return __pte_access_permitted(pmd_val(pmd), write); } #define pud_access_permitted pud_access_permitted static inline bool pud_access_permitted(pud_t pud, bool write) { return __pte_access_permitted(pud_val(pud), write); } #define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); static inline bool arch_has_pfn_modify_check(void) { return boot_cpu_has_bug(X86_BUG_L1TF); } #define arch_check_zapped_pte arch_check_zapped_pte void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte); #define arch_check_zapped_pmd arch_check_zapped_pmd void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd); #define arch_check_zapped_pud arch_check_zapped_pud void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud); #ifdef CONFIG_XEN_PV #define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young static inline bool arch_has_hw_nonleaf_pmd_young(void) { return !cpu_feature_enabled(X86_FEATURE_XENPV); } #endif #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); } static inline bool pmd_user_accessible_page(pmd_t pmd) { return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); } static inline bool pud_user_accessible_page(pud_t pud) { return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); } #endif #ifdef CONFIG_X86_SGX int arch_memory_failure(unsigned long pfn, int flags); #define arch_memory_failure arch_memory_failure bool arch_is_platform_page(u64 paddr); #define arch_is_platform_page arch_is_platform_page #endif /* * Use set_p*_safe(), and elide TLB flushing, when confident that *no* * TLB flush will be required as a result of the "set". For example, use * in scenarios where it is known ahead of time that the routine is * setting non-present entries, or re-setting an existing entry to the * same value. Otherwise, use the typical "set" helpers and flush the * TLB. */ #define set_pte_safe(ptep, pte) \ ({ \ WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ set_pte(ptep, pte); \ }) #define set_pmd_safe(pmdp, pmd) \ ({ \ WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ set_pmd(pmdp, pmd); \ }) #define set_pud_safe(pudp, pud) \ ({ \ WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ set_pud(pudp, pud); \ }) #define set_p4d_safe(p4dp, p4d) \ ({ \ WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ set_p4d(p4dp, p4d); \ }) #define set_pgd_safe(pgdp, pgd) \ ({ \ WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ set_pgd(pgdp, pgd); \ }) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ |
1 1 1 1 1 1 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | // SPDX-License-Identifier: GPL-2.0+ /* * hwmon driver for Gigabyte AORUS Waterforce AIO CPU coolers: X240, X280 and X360. * * Copyright 2023 Aleksa Savic <savicaleksa83@gmail.com> */ #include <linux/debugfs.h> #include <linux/hid.h> #include <linux/hwmon.h> #include <linux/jiffies.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/unaligned.h> #define DRIVER_NAME "gigabyte_waterforce" #define USB_VENDOR_ID_GIGABYTE 0x1044 #define USB_PRODUCT_ID_WATERFORCE 0x7a4d /* Gigabyte AORUS WATERFORCE X240, X280 and X360 */ #define STATUS_VALIDITY (2 * 1000) /* ms */ #define MAX_REPORT_LENGTH 6144 #define WATERFORCE_TEMP_SENSOR 0xD #define WATERFORCE_FAN_SPEED 0x02 #define WATERFORCE_PUMP_SPEED 0x05 #define WATERFORCE_FAN_DUTY 0x08 #define WATERFORCE_PUMP_DUTY 0x09 /* Control commands, inner offsets and lengths */ static const u8 get_status_cmd[] = { 0x99, 0xDA }; #define FIRMWARE_VER_START_OFFSET_1 2 #define FIRMWARE_VER_START_OFFSET_2 3 static const u8 get_firmware_ver_cmd[] = { 0x99, 0xD6 }; /* Command lengths */ #define GET_STATUS_CMD_LENGTH 2 #define GET_FIRMWARE_VER_CMD_LENGTH 2 static const char *const waterforce_temp_label[] = { "Coolant temp" }; static const char *const waterforce_speed_label[] = { "Fan speed", "Pump speed" }; struct waterforce_data { struct hid_device *hdev; struct device *hwmon_dev; struct dentry *debugfs; /* For locking access to buffer */ struct mutex buffer_lock; /* For queueing multiple readers */ struct mutex status_report_request_mutex; /* For reinitializing the completion below */ spinlock_t status_report_request_lock; struct completion status_report_received; struct completion fw_version_processed; /* Sensor data */ s32 temp_input[1]; u16 speed_input[2]; /* Fan and pump speed in RPM */ u8 duty_input[2]; /* Fan and pump duty in 0-100% */ u8 *buffer; int firmware_version; unsigned long updated; /* jiffies */ }; static umode_t waterforce_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) { switch (type) { case hwmon_temp: switch (attr) { case hwmon_temp_label: case hwmon_temp_input: return 0444; default: break; } break; case hwmon_fan: switch (attr) { case hwmon_fan_label: case hwmon_fan_input: return 0444; default: break; } break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: return 0444; default: break; } break; default: break; } return 0; } /* Writes the command to the device with the rest of the report filled with zeroes */ static int waterforce_write_expanded(struct waterforce_data *priv, const u8 *cmd, int cmd_length) { int ret; mutex_lock(&priv->buffer_lock); memcpy_and_pad(priv->buffer, MAX_REPORT_LENGTH, cmd, cmd_length, 0x00); ret = hid_hw_output_report(priv->hdev, priv->buffer, MAX_REPORT_LENGTH); mutex_unlock(&priv->buffer_lock); return ret; } static int waterforce_get_status(struct waterforce_data *priv) { int ret = mutex_lock_interruptible(&priv->status_report_request_mutex); if (ret < 0) return ret; if (!time_after(jiffies, priv->updated + msecs_to_jiffies(STATUS_VALIDITY))) { /* Data is up to date */ goto unlock_and_return; } /* * Disable raw event parsing for a moment to safely reinitialize the * completion. Reinit is done because hidraw could have triggered * the raw event parsing and marked the priv->status_report_received * completion as done. */ spin_lock_bh(&priv->status_report_request_lock); reinit_completion(&priv->status_report_received); spin_unlock_bh(&priv->status_report_request_lock); /* Send command for getting status */ ret = waterforce_write_expanded(priv, get_status_cmd, GET_STATUS_CMD_LENGTH); if (ret < 0) goto unlock_and_return; ret = wait_for_completion_interruptible_timeout(&priv->status_report_received, msecs_to_jiffies(STATUS_VALIDITY)); if (ret == 0) ret = -ETIMEDOUT; unlock_and_return: mutex_unlock(&priv->status_report_request_mutex); if (ret < 0) return ret; return 0; } static int waterforce_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct waterforce_data *priv = dev_get_drvdata(dev); int ret = waterforce_get_status(priv); if (ret < 0) return ret; switch (type) { case hwmon_temp: *val = priv->temp_input[channel]; break; case hwmon_fan: *val = priv->speed_input[channel]; break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: *val = DIV_ROUND_CLOSEST(priv->duty_input[channel] * 255, 100); break; default: return -EOPNOTSUPP; } break; default: return -EOPNOTSUPP; /* unreachable */ } return 0; } static int waterforce_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) { switch (type) { case hwmon_temp: *str = waterforce_temp_label[channel]; break; case hwmon_fan: *str = waterforce_speed_label[channel]; break; default: return -EOPNOTSUPP; /* unreachable */ } return 0; } static int waterforce_get_fw_ver(struct hid_device *hdev) { struct waterforce_data *priv = hid_get_drvdata(hdev); int ret; ret = waterforce_write_expanded(priv, get_firmware_ver_cmd, GET_FIRMWARE_VER_CMD_LENGTH); if (ret < 0) return ret; ret = wait_for_completion_interruptible_timeout(&priv->fw_version_processed, msecs_to_jiffies(STATUS_VALIDITY)); if (ret == 0) return -ETIMEDOUT; else if (ret < 0) return ret; return 0; } static const struct hwmon_ops waterforce_hwmon_ops = { .is_visible = waterforce_is_visible, .read = waterforce_read, .read_string = waterforce_read_string }; static const struct hwmon_channel_info *waterforce_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL), HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL, HWMON_F_INPUT | HWMON_F_LABEL), HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT, HWMON_PWM_INPUT), NULL }; static const struct hwmon_chip_info waterforce_chip_info = { .ops = &waterforce_hwmon_ops, .info = waterforce_info, }; static int waterforce_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct waterforce_data *priv = hid_get_drvdata(hdev); if (data[0] == get_firmware_ver_cmd[0] && data[1] == get_firmware_ver_cmd[1]) { /* Received a firmware version report */ priv->firmware_version = data[FIRMWARE_VER_START_OFFSET_1] * 10 + data[FIRMWARE_VER_START_OFFSET_2]; if (!completion_done(&priv->fw_version_processed)) complete_all(&priv->fw_version_processed); return 0; } if (data[0] != get_status_cmd[0] || data[1] != get_status_cmd[1]) return 0; priv->temp_input[0] = data[WATERFORCE_TEMP_SENSOR] * 1000; priv->speed_input[0] = get_unaligned_le16(data + WATERFORCE_FAN_SPEED); priv->speed_input[1] = get_unaligned_le16(data + WATERFORCE_PUMP_SPEED); priv->duty_input[0] = data[WATERFORCE_FAN_DUTY]; priv->duty_input[1] = data[WATERFORCE_PUMP_DUTY]; spin_lock(&priv->status_report_request_lock); if (!completion_done(&priv->status_report_received)) complete_all(&priv->status_report_received); spin_unlock(&priv->status_report_request_lock); priv->updated = jiffies; return 0; } static int firmware_version_show(struct seq_file *seqf, void *unused) { struct waterforce_data *priv = seqf->private; seq_printf(seqf, "%u\n", priv->firmware_version); return 0; } DEFINE_SHOW_ATTRIBUTE(firmware_version); static void waterforce_debugfs_init(struct waterforce_data *priv) { char name[64]; if (!priv->firmware_version) return; /* There's nothing to show in debugfs */ scnprintf(name, sizeof(name), "%s-%s", DRIVER_NAME, dev_name(&priv->hdev->dev)); priv->debugfs = debugfs_create_dir(name, NULL); debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops); } static int waterforce_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct waterforce_data *priv; int ret; priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->hdev = hdev; hid_set_drvdata(hdev, priv); /* * Initialize priv->updated to STATUS_VALIDITY seconds in the past, making * the initial empty data invalid for waterforce_read() without the need for * a special case there. */ priv->updated = jiffies - msecs_to_jiffies(STATUS_VALIDITY); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "hid parse failed with %d\n", ret); return ret; } /* * Enable hidraw so existing user-space tools can continue to work. */ ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); if (ret) { hid_err(hdev, "hid hw start failed with %d\n", ret); return ret; } ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "hid hw open failed with %d\n", ret); goto fail_and_stop; } priv->buffer = devm_kzalloc(&hdev->dev, MAX_REPORT_LENGTH, GFP_KERNEL); if (!priv->buffer) { ret = -ENOMEM; goto fail_and_close; } mutex_init(&priv->status_report_request_mutex); mutex_init(&priv->buffer_lock); spin_lock_init(&priv->status_report_request_lock); init_completion(&priv->status_report_received); init_completion(&priv->fw_version_processed); hid_device_io_start(hdev); ret = waterforce_get_fw_ver(hdev); if (ret < 0) hid_warn(hdev, "fw version request failed with %d\n", ret); priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "waterforce", priv, &waterforce_chip_info, NULL); if (IS_ERR(priv->hwmon_dev)) { ret = PTR_ERR(priv->hwmon_dev); hid_err(hdev, "hwmon registration failed with %d\n", ret); goto fail_and_close; } waterforce_debugfs_init(priv); return 0; fail_and_close: hid_hw_close(hdev); fail_and_stop: hid_hw_stop(hdev); return ret; } static void waterforce_remove(struct hid_device *hdev) { struct waterforce_data *priv = hid_get_drvdata(hdev); debugfs_remove_recursive(priv->debugfs); hwmon_device_unregister(priv->hwmon_dev); hid_hw_close(hdev); hid_hw_stop(hdev); } static const struct hid_device_id waterforce_table[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GIGABYTE, USB_PRODUCT_ID_WATERFORCE) }, { } }; MODULE_DEVICE_TABLE(hid, waterforce_table); static struct hid_driver waterforce_driver = { .name = "waterforce", .id_table = waterforce_table, .probe = waterforce_probe, .remove = waterforce_remove, .raw_event = waterforce_raw_event, }; static int __init waterforce_init(void) { return hid_register_driver(&waterforce_driver); } static void __exit waterforce_exit(void) { hid_unregister_driver(&waterforce_driver); } /* When compiled into the kernel, initialize after the HID bus */ late_initcall(waterforce_init); module_exit(waterforce_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Aleksa Savic <savicaleksa83@gmail.com>"); MODULE_DESCRIPTION("Hwmon driver for Gigabyte AORUS Waterforce AIO coolers"); |
26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_BUFFER_H #define _LINUX_TTY_BUFFER_H #include <linux/atomic.h> #include <linux/llist.h> #include <linux/mutex.h> #include <linux/workqueue.h> struct tty_buffer { union { struct tty_buffer *next; struct llist_node free; }; unsigned int used; unsigned int size; unsigned int commit; unsigned int lookahead; /* Lazy update on recv, can become less than "read" */ unsigned int read; bool flags; /* Data points here */ u8 data[] __aligned(sizeof(unsigned long)); }; static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs) { return b->data + ofs; } static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs) { return char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ atomic_t mem_used; /* In-use buffers excluding free list */ int mem_limit; struct tty_buffer *tail; /* Active buffer */ }; /* * When a break, frame error, or parity error happens, these codes are * stuffed into the flags buffer. */ #define TTY_NORMAL 0 #define TTY_BREAK 1 #define TTY_FRAME 2 #define TTY_PARITY 3 #define TTY_OVERRUN 4 #endif |
2 2 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for AVX assembler versions of Serpent Cipher * * Copyright (C) 2012 Johannes Goetzfried * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> * * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> */ #include <linux/module.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> #include <crypto/algapi.h> #include <crypto/internal/simd.h> #include <crypto/serpent.h> #include "serpent-avx.h" #include "ecb_cbc_helpers.h" /* 8-way parallel cipher functions */ asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx); asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx); asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx); ECB_BLOCK(1, __serpent_encrypt); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx); ECB_BLOCK(1, __serpent_decrypt); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1); CBC_ENC_BLOCK(__serpent_encrypt); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS); CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx); CBC_DEC_BLOCK(1, __serpent_decrypt); CBC_WALK_END(); } static struct skcipher_alg serpent_algs[] = { { .base.cra_name = "__ecb(serpent)", .base.cra_driver_name = "__ecb-serpent-avx", .base.cra_priority = 500, .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = SERPENT_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct serpent_ctx), .base.cra_module = THIS_MODULE, .min_keysize = SERPENT_MIN_KEY_SIZE, .max_keysize = SERPENT_MAX_KEY_SIZE, .setkey = serpent_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "__cbc(serpent)", .base.cra_driver_name = "__cbc-serpent-avx", .base.cra_priority = 500, .base.cra_flags = CRYPTO_ALG_INTERNAL, .base.cra_blocksize = SERPENT_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct serpent_ctx), .base.cra_module = THIS_MODULE, .min_keysize = SERPENT_MIN_KEY_SIZE, .max_keysize = SERPENT_MAX_KEY_SIZE, .ivsize = SERPENT_BLOCK_SIZE, .setkey = serpent_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, }; static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_init(void) { const char *feature_name; if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } return simd_register_skciphers_compat(serpent_algs, ARRAY_SIZE(serpent_algs), serpent_simd_algs); } static void __exit serpent_exit(void) { simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), serpent_simd_algs); } module_init(serpent_init); module_exit(serpent_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("serpent"); |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Roccat Arvo driver for Linux * * Copyright (c) 2011 Stefan Achatz <erazor_de@users.sourceforge.net> */ /* */ /* * Roccat Arvo is a gamer keyboard with 5 macro keys that can be configured in * 5 profiles. */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/hid-roccat.h> #include "hid-ids.h" #include "hid-roccat-common.h" #include "hid-roccat-arvo.h" static ssize_t arvo_sysfs_show_mode_key(struct device *dev, struct device_attribute *attr, char *buf) { struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev->parent->parent)); struct arvo_mode_key temp_buf; int retval; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_receive(usb_dev, ARVO_COMMAND_MODE_KEY, &temp_buf, sizeof(struct arvo_mode_key)); mutex_unlock(&arvo->arvo_lock); if (retval) return retval; return sysfs_emit(buf, "%d\n", temp_buf.state); } static ssize_t arvo_sysfs_set_mode_key(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev->parent->parent)); struct arvo_mode_key temp_buf; unsigned long state; int retval; retval = kstrtoul(buf, 10, &state); if (retval) return retval; temp_buf.command = ARVO_COMMAND_MODE_KEY; temp_buf.state = state; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_send(usb_dev, ARVO_COMMAND_MODE_KEY, &temp_buf, sizeof(struct arvo_mode_key)); mutex_unlock(&arvo->arvo_lock); if (retval) return retval; return size; } static DEVICE_ATTR(mode_key, 0660, arvo_sysfs_show_mode_key, arvo_sysfs_set_mode_key); static ssize_t arvo_sysfs_show_key_mask(struct device *dev, struct device_attribute *attr, char *buf) { struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev->parent->parent)); struct arvo_key_mask temp_buf; int retval; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_receive(usb_dev, ARVO_COMMAND_KEY_MASK, &temp_buf, sizeof(struct arvo_key_mask)); mutex_unlock(&arvo->arvo_lock); if (retval) return retval; return sysfs_emit(buf, "%d\n", temp_buf.key_mask); } static ssize_t arvo_sysfs_set_key_mask(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev->parent->parent)); struct arvo_key_mask temp_buf; unsigned long key_mask; int retval; retval = kstrtoul(buf, 10, &key_mask); if (retval) return retval; temp_buf.command = ARVO_COMMAND_KEY_MASK; temp_buf.key_mask = key_mask; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_send(usb_dev, ARVO_COMMAND_KEY_MASK, &temp_buf, sizeof(struct arvo_key_mask)); mutex_unlock(&arvo->arvo_lock); if (retval) return retval; return size; } static DEVICE_ATTR(key_mask, 0660, arvo_sysfs_show_key_mask, arvo_sysfs_set_key_mask); /* retval is 1-5 on success, < 0 on error */ static int arvo_get_actual_profile(struct usb_device *usb_dev) { struct arvo_actual_profile temp_buf; int retval; retval = roccat_common2_receive(usb_dev, ARVO_COMMAND_ACTUAL_PROFILE, &temp_buf, sizeof(struct arvo_actual_profile)); if (retval) return retval; return temp_buf.actual_profile; } static ssize_t arvo_sysfs_show_actual_profile(struct device *dev, struct device_attribute *attr, char *buf) { struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", arvo->actual_profile); } static ssize_t arvo_sysfs_set_actual_profile(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev->parent->parent)); struct arvo_actual_profile temp_buf; unsigned long profile; int retval; retval = kstrtoul(buf, 10, &profile); if (retval) return retval; if (profile < 1 || profile > 5) return -EINVAL; temp_buf.command = ARVO_COMMAND_ACTUAL_PROFILE; temp_buf.actual_profile = profile; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_send(usb_dev, ARVO_COMMAND_ACTUAL_PROFILE, &temp_buf, sizeof(struct arvo_actual_profile)); if (!retval) { arvo->actual_profile = profile; retval = size; } mutex_unlock(&arvo->arvo_lock); return retval; } static DEVICE_ATTR(actual_profile, 0660, arvo_sysfs_show_actual_profile, arvo_sysfs_set_actual_profile); static ssize_t arvo_sysfs_write(struct file *fp, struct kobject *kobj, void const *buf, loff_t off, size_t count, size_t real_size, uint command) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); int retval; if (off != 0 || count != real_size) return -EINVAL; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_send(usb_dev, command, buf, real_size); mutex_unlock(&arvo->arvo_lock); return (retval ? retval : real_size); } static ssize_t arvo_sysfs_read(struct file *fp, struct kobject *kobj, void *buf, loff_t off, size_t count, size_t real_size, uint command) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); int retval; if (off >= real_size) return 0; if (off != 0 || count != real_size) return -EINVAL; mutex_lock(&arvo->arvo_lock); retval = roccat_common2_receive(usb_dev, command, buf, real_size); mutex_unlock(&arvo->arvo_lock); return (retval ? retval : real_size); } static ssize_t arvo_sysfs_write_button(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return arvo_sysfs_write(fp, kobj, buf, off, count, sizeof(struct arvo_button), ARVO_COMMAND_BUTTON); } static const BIN_ATTR(button, 0220, NULL, arvo_sysfs_write_button, sizeof(struct arvo_button)); static ssize_t arvo_sysfs_read_info(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return arvo_sysfs_read(fp, kobj, buf, off, count, sizeof(struct arvo_info), ARVO_COMMAND_INFO); } static const BIN_ATTR(info, 0440, arvo_sysfs_read_info, NULL, sizeof(struct arvo_info)); static struct attribute *arvo_attrs[] = { &dev_attr_mode_key.attr, &dev_attr_key_mask.attr, &dev_attr_actual_profile.attr, NULL, }; static const struct bin_attribute *const arvo_bin_attributes[] = { &bin_attr_button, &bin_attr_info, NULL, }; static const struct attribute_group arvo_group = { .attrs = arvo_attrs, .bin_attrs_new = arvo_bin_attributes, }; static const struct attribute_group *arvo_groups[] = { &arvo_group, NULL, }; static const struct class arvo_class = { .name = "arvo", .dev_groups = arvo_groups, }; static int arvo_init_arvo_device_struct(struct usb_device *usb_dev, struct arvo_device *arvo) { int retval; mutex_init(&arvo->arvo_lock); retval = arvo_get_actual_profile(usb_dev); if (retval < 0) return retval; arvo->actual_profile = retval; return 0; } static int arvo_init_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_device *usb_dev = interface_to_usbdev(intf); struct arvo_device *arvo; int retval; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_KEYBOARD) { hid_set_drvdata(hdev, NULL); return 0; } arvo = kzalloc(sizeof(*arvo), GFP_KERNEL); if (!arvo) { hid_err(hdev, "can't alloc device descriptor\n"); return -ENOMEM; } hid_set_drvdata(hdev, arvo); retval = arvo_init_arvo_device_struct(usb_dev, arvo); if (retval) { hid_err(hdev, "couldn't init struct arvo_device\n"); goto exit_free; } retval = roccat_connect(&arvo_class, hdev, sizeof(struct arvo_roccat_report)); if (retval < 0) { hid_err(hdev, "couldn't init char dev\n"); } else { arvo->chrdev_minor = retval; arvo->roccat_claimed = 1; } return 0; exit_free: kfree(arvo); return retval; } static void arvo_remove_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct arvo_device *arvo; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_KEYBOARD) return; arvo = hid_get_drvdata(hdev); if (arvo->roccat_claimed) roccat_disconnect(arvo->chrdev_minor); kfree(arvo); } static int arvo_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; if (!hid_is_usb(hdev)) return -EINVAL; retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); goto exit; } retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (retval) { hid_err(hdev, "hw start failed\n"); goto exit; } retval = arvo_init_specials(hdev); if (retval) { hid_err(hdev, "couldn't install keyboard\n"); goto exit_stop; } return 0; exit_stop: hid_hw_stop(hdev); exit: return retval; } static void arvo_remove(struct hid_device *hdev) { arvo_remove_specials(hdev); hid_hw_stop(hdev); } static void arvo_report_to_chrdev(struct arvo_device const *arvo, u8 const *data) { struct arvo_special_report const *special_report; struct arvo_roccat_report roccat_report; special_report = (struct arvo_special_report const *)data; roccat_report.profile = arvo->actual_profile; roccat_report.button = special_report->event & ARVO_SPECIAL_REPORT_EVENT_MASK_BUTTON; if ((special_report->event & ARVO_SPECIAL_REPORT_EVENT_MASK_ACTION) == ARVO_SPECIAL_REPORT_EVENT_ACTION_PRESS) roccat_report.action = ARVO_ROCCAT_REPORT_ACTION_PRESS; else roccat_report.action = ARVO_ROCCAT_REPORT_ACTION_RELEASE; roccat_report_event(arvo->chrdev_minor, (uint8_t const *)&roccat_report); } static int arvo_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct arvo_device *arvo = hid_get_drvdata(hdev); if (size != 3) return 0; if (arvo && arvo->roccat_claimed) arvo_report_to_chrdev(arvo, data); return 0; } static const struct hid_device_id arvo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) }, { } }; MODULE_DEVICE_TABLE(hid, arvo_devices); static struct hid_driver arvo_driver = { .name = "arvo", .id_table = arvo_devices, .probe = arvo_probe, .remove = arvo_remove, .raw_event = arvo_raw_event }; static int __init arvo_init(void) { int retval; retval = class_register(&arvo_class); if (retval) return retval; retval = hid_register_driver(&arvo_driver); if (retval) class_unregister(&arvo_class); return retval; } static void __exit arvo_exit(void) { hid_unregister_driver(&arvo_driver); class_unregister(&arvo_class); } module_init(arvo_init); module_exit(arvo_exit); MODULE_AUTHOR("Stefan Achatz"); MODULE_DESCRIPTION("USB Roccat Arvo driver"); MODULE_LICENSE("GPL v2"); |
109 109 16 17 7 7 7 7 109 109 108 109 109 109 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 | // SPDX-License-Identifier: GPL-2.0 /* * HugeTLB Vmemmap Optimization (HVO) * * Copyright (c) 2020, ByteDance. All rights reserved. * * Author: Muchun Song <songmuchun@bytedance.com> * * See Documentation/mm/vmemmap_dedup.rst */ #define pr_fmt(fmt) "HugeTLB: " fmt #include <linux/pgtable.h> #include <linux/moduleparam.h> #include <linux/bootmem_info.h> #include <linux/mmdebug.h> #include <linux/pagewalk.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "hugetlb_vmemmap.h" /** * struct vmemmap_remap_walk - walk vmemmap page table * * @remap_pte: called for each lowest-level entry (PTE). * @nr_walked: the number of walked pte. * @reuse_page: the page which is reused for the tail vmemmap pages. * @reuse_addr: the virtual address of the @reuse_page page. * @vmemmap_pages: the list head of the vmemmap pages that can be freed * or is mapped from. * @flags: used to modify behavior in vmemmap page table walking * operations. */ struct vmemmap_remap_walk { void (*remap_pte)(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk); unsigned long nr_walked; struct page *reuse_page; unsigned long reuse_addr; struct list_head *vmemmap_pages; /* Skip the TLB flush when we split the PMD */ #define VMEMMAP_SPLIT_NO_TLB_FLUSH BIT(0) /* Skip the TLB flush when we remap the PTE */ #define VMEMMAP_REMAP_NO_TLB_FLUSH BIT(1) /* synchronize_rcu() to avoid writes from page_ref_add_unless() */ #define VMEMMAP_SYNCHRONIZE_RCU BIT(2) unsigned long flags; }; static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start, struct vmemmap_remap_walk *walk) { pmd_t __pmd; int i; unsigned long addr = start; pte_t *pgtable; pgtable = pte_alloc_one_kernel(&init_mm); if (!pgtable) return -ENOMEM; pmd_populate_kernel(&init_mm, &__pmd, pgtable); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { pte_t entry, *pte; pgprot_t pgprot = PAGE_KERNEL; entry = mk_pte(head + i, pgprot); pte = pte_offset_kernel(&__pmd, addr); set_pte_at(&init_mm, addr, pte, entry); } spin_lock(&init_mm.page_table_lock); if (likely(pmd_leaf(*pmd))) { /* * Higher order allocations from buddy allocator must be able to * be treated as indepdenent small pages (as they can be freed * individually). */ if (!PageReserved(head)) split_page(head, get_order(PMD_SIZE)); /* Make pte visible before pmd. See comment in pmd_install(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); if (!(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH)) flush_tlb_kernel_range(start, start + PMD_SIZE); } else { pte_free_kernel(&init_mm, pgtable); } spin_unlock(&init_mm.page_table_lock); return 0; } static int vmemmap_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { int ret = 0; struct page *head; struct vmemmap_remap_walk *vmemmap_walk = walk->private; /* Only splitting, not remapping the vmemmap pages. */ if (!vmemmap_walk->remap_pte) walk->action = ACTION_CONTINUE; spin_lock(&init_mm.page_table_lock); head = pmd_leaf(*pmd) ? pmd_page(*pmd) : NULL; /* * Due to HugeTLB alignment requirements and the vmemmap * pages being at the start of the hotplugged memory * region in memory_hotplug.memmap_on_memory case. Checking * the vmemmap page associated with the first vmemmap page * if it is self-hosted is sufficient. * * [ hotplugged memory ] * [ section ][...][ section ] * [ vmemmap ][ usable memory ] * ^ | ^ | * +--+ | | * +------------------------+ */ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && unlikely(!vmemmap_walk->nr_walked)) { struct page *page = head ? head + pte_index(addr) : pte_page(ptep_get(pte_offset_kernel(pmd, addr))); if (PageVmemmapSelfHosted(page)) ret = -ENOTSUPP; } spin_unlock(&init_mm.page_table_lock); if (!head || ret) return ret; return vmemmap_split_pmd(pmd, head, addr & PMD_MASK, vmemmap_walk); } static int vmemmap_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct vmemmap_remap_walk *vmemmap_walk = walk->private; /* * The reuse_page is found 'first' in page table walking before * starting remapping. */ if (!vmemmap_walk->reuse_page) vmemmap_walk->reuse_page = pte_page(ptep_get(pte)); else vmemmap_walk->remap_pte(pte, addr, vmemmap_walk); vmemmap_walk->nr_walked++; return 0; } static const struct mm_walk_ops vmemmap_remap_ops = { .pmd_entry = vmemmap_pmd_entry, .pte_entry = vmemmap_pte_entry, }; static int vmemmap_remap_range(unsigned long start, unsigned long end, struct vmemmap_remap_walk *walk) { int ret; VM_BUG_ON(!PAGE_ALIGNED(start | end)); mmap_read_lock(&init_mm); ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, NULL, walk); mmap_read_unlock(&init_mm); if (ret) return ret; if (walk->remap_pte && !(walk->flags & VMEMMAP_REMAP_NO_TLB_FLUSH)) flush_tlb_kernel_range(start, end); return 0; } /* * Free a vmemmap page. A vmemmap page can be allocated from the memblock * allocator or buddy allocator. If the PG_reserved flag is set, it means * that it allocated from the memblock allocator, just free it via the * free_bootmem_page(). Otherwise, use __free_page(). */ static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { memmap_boot_pages_add(-1); free_bootmem_page(page); } else { memmap_pages_add(-1); __free_page(page); } } /* Free a list of the vmemmap pages */ static void free_vmemmap_page_list(struct list_head *list) { struct page *page, *next; list_for_each_entry_safe(page, next, list, lru) free_vmemmap_page(page); } static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk) { /* * Remap the tail pages as read-only to catch illegal write operation * to the tail pages. */ pgprot_t pgprot = PAGE_KERNEL_RO; struct page *page = pte_page(ptep_get(pte)); pte_t entry; /* Remapping the head page requires r/w */ if (unlikely(addr == walk->reuse_addr)) { pgprot = PAGE_KERNEL; list_del(&walk->reuse_page->lru); /* * Makes sure that preceding stores to the page contents from * vmemmap_remap_free() become visible before the set_pte_at() * write. */ smp_wmb(); } entry = mk_pte(walk->reuse_page, pgprot); list_add(&page->lru, walk->vmemmap_pages); set_pte_at(&init_mm, addr, pte, entry); } /* * How many struct page structs need to be reset. When we reuse the head * struct page, the special metadata (e.g. page->flags or page->mapping) * cannot copy to the tail struct page structs. The invalid value will be * checked in the free_tail_page_prepare(). In order to avoid the message * of "corrupted mapping in tail page". We need to reset at least 3 (one * head struct page struct and two tail struct page structs) struct page * structs. */ #define NR_RESET_STRUCT_PAGE 3 static inline void reset_struct_pages(struct page *start) { struct page *from = start + NR_RESET_STRUCT_PAGE; BUILD_BUG_ON(NR_RESET_STRUCT_PAGE * 2 > PAGE_SIZE / sizeof(struct page)); memcpy(start, from, sizeof(*from) * NR_RESET_STRUCT_PAGE); } static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk) { pgprot_t pgprot = PAGE_KERNEL; struct page *page; void *to; BUG_ON(pte_page(ptep_get(pte)) != walk->reuse_page); page = list_first_entry(walk->vmemmap_pages, struct page, lru); list_del(&page->lru); to = page_to_virt(page); copy_page(to, (void *)walk->reuse_addr); reset_struct_pages(to); /* * Makes sure that preceding stores to the page contents become visible * before the set_pte_at() write. */ smp_wmb(); set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot)); } /** * vmemmap_remap_split - split the vmemmap virtual address range [@start, @end) * backing PMDs of the directmap into PTEs * @start: start address of the vmemmap virtual address range that we want * to remap. * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_split(unsigned long start, unsigned long end, unsigned long reuse) { struct vmemmap_remap_walk walk = { .remap_pte = NULL, .flags = VMEMMAP_SPLIT_NO_TLB_FLUSH, }; /* See the comment in the vmemmap_remap_free(). */ BUG_ON(start - reuse != PAGE_SIZE); return vmemmap_remap_range(reuse, end, &walk); } /** * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end) * to the page which @reuse is mapped to, then free vmemmap * which the range are mapped to. * @start: start address of the vmemmap virtual address range that we want * to remap. * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. * @vmemmap_pages: list to deposit vmemmap pages to be freed. It is callers * responsibility to free pages. * @flags: modifications to vmemmap_remap_walk flags * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse, struct list_head *vmemmap_pages, unsigned long flags) { int ret; struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_remap_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, .flags = flags, }; int nid = page_to_nid((struct page *)reuse); gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; /* * Allocate a new head vmemmap page to avoid breaking a contiguous * block of struct page memory when freeing it back to page allocator * in free_vmemmap_page_list(). This will allow the likely contiguous * struct page backing memory to be kept contiguous and allowing for * more allocations of hugepages. Fallback to the currently * mapped head page in case should it fail to allocate. */ walk.reuse_page = alloc_pages_node(nid, gfp_mask, 0); if (walk.reuse_page) { copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); memmap_pages_add(1); } /* * In order to make remapping routine most efficient for the huge pages, * the routine of vmemmap page table walking has the following rules * (see more details from the vmemmap_pte_range()): * * - The range [@start, @end) and the range [@reuse, @reuse + PAGE_SIZE) * should be continuous. * - The @reuse address is part of the range [@reuse, @end) that we are * walking which is passed to vmemmap_remap_range(). * - The @reuse address is the first in the complete range. * * So we need to make sure that @start and @reuse meet the above rules. */ BUG_ON(start - reuse != PAGE_SIZE); ret = vmemmap_remap_range(reuse, end, &walk); if (ret && walk.nr_walked) { end = reuse + walk.nr_walked * PAGE_SIZE; /* * vmemmap_pages contains pages from the previous * vmemmap_remap_range call which failed. These * are pages which were removed from the vmemmap. * They will be restored in the following call. */ walk = (struct vmemmap_remap_walk) { .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = vmemmap_pages, .flags = 0, }; vmemmap_remap_range(reuse, end, &walk); } return ret; } static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, struct list_head *list) { gfp_t gfp_mask = GFP_KERNEL | __GFP_RETRY_MAYFAIL; unsigned long nr_pages = (end - start) >> PAGE_SHIFT; int nid = page_to_nid((struct page *)start); struct page *page, *next; int i; for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); if (!page) goto out; list_add(&page->lru, list); } memmap_pages_add(nr_pages); return 0; out: list_for_each_entry_safe(page, next, list, lru) __free_page(page); return -ENOMEM; } /** * vmemmap_remap_alloc - remap the vmemmap virtual address range [@start, end) * to the page which is from the @vmemmap_pages * respectively. * @start: start address of the vmemmap virtual address range that we want * to remap. * @end: end address of the vmemmap virtual address range that we want to * remap. * @reuse: reuse address. * @flags: modifications to vmemmap_remap_walk flags * * Return: %0 on success, negative error code otherwise. */ static int vmemmap_remap_alloc(unsigned long start, unsigned long end, unsigned long reuse, unsigned long flags) { LIST_HEAD(vmemmap_pages); struct vmemmap_remap_walk walk = { .remap_pte = vmemmap_restore_pte, .reuse_addr = reuse, .vmemmap_pages = &vmemmap_pages, .flags = flags, }; /* See the comment in the vmemmap_remap_free(). */ BUG_ON(start - reuse != PAGE_SIZE); if (alloc_vmemmap_page_list(start, end, &vmemmap_pages)) return -ENOMEM; return vmemmap_remap_range(reuse, end, &walk); } DEFINE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key); static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio, unsigned long flags) { int ret; unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end; unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); if (!folio_test_hugetlb_vmemmap_optimized(folio)) return 0; if (flags & VMEMMAP_SYNCHRONIZE_RCU) synchronize_rcu(); vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; /* * The pages which the vmemmap virtual address range [@vmemmap_start, * @vmemmap_end) are mapped to are freed to the buddy allocator, and * the range is mapped to the page which @vmemmap_reuse is mapped to. * When a HugeTLB page is freed to the buddy allocator, previously * discarded vmemmap pages must be allocated and remapping. */ ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse, flags); if (!ret) { folio_clear_hugetlb_vmemmap_optimized(folio); static_branch_dec(&hugetlb_optimize_vmemmap_key); } return ret; } /** * hugetlb_vmemmap_restore_folio - restore previously optimized (by * hugetlb_vmemmap_optimize_folio()) vmemmap pages which * will be reallocated and remapped. * @h: struct hstate. * @folio: the folio whose vmemmap pages will be restored. * * Return: %0 if @folio's vmemmap pages have been reallocated and remapped, * negative error code otherwise. */ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) { return __hugetlb_vmemmap_restore_folio(h, folio, VMEMMAP_SYNCHRONIZE_RCU); } /** * hugetlb_vmemmap_restore_folios - restore vmemmap for every folio on the list. * @h: hstate. * @folio_list: list of folios. * @non_hvo_folios: Output list of folios for which vmemmap exists. * * Return: number of folios for which vmemmap was restored, or an error code * if an error was encountered restoring vmemmap for a folio. * Folios that have vmemmap are moved to the non_hvo_folios * list. Processing of entries stops when the first error is * encountered. The folio that experienced the error and all * non-processed folios will remain on folio_list. */ long hugetlb_vmemmap_restore_folios(const struct hstate *h, struct list_head *folio_list, struct list_head *non_hvo_folios) { struct folio *folio, *t_folio; long restored = 0; long ret = 0; unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { ret = __hugetlb_vmemmap_restore_folio(h, folio, flags); /* only need to synchronize_rcu() once for each batch */ flags &= ~VMEMMAP_SYNCHRONIZE_RCU; if (ret) break; restored++; } /* Add non-optimized folios to output list */ list_move(&folio->lru, non_hvo_folios); } if (restored) flush_tlb_all(); if (!ret) ret = restored; return ret; } /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *folio) { if (folio_test_hugetlb_vmemmap_optimized(folio)) return false; if (!READ_ONCE(vmemmap_optimize_enabled)) return false; if (!hugetlb_vmemmap_optimizable(h)) return false; return true; } static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio, struct list_head *vmemmap_pages, unsigned long flags) { int ret = 0; unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end; unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); if (!vmemmap_should_optimize_folio(h, folio)) return ret; static_branch_inc(&hugetlb_optimize_vmemmap_key); if (flags & VMEMMAP_SYNCHRONIZE_RCU) synchronize_rcu(); /* * Very Subtle * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed * immediately after remapping. As a result, subsequent accesses * and modifications to struct pages associated with the hugetlb * page could be to the OLD struct pages. Set the vmemmap optimized * flag here so that it is copied to the new head page. This keeps * the old and new struct pages in sync. * If there is an error during optimization, we will immediately FLUSH * the TLB and clear the flag below. */ folio_set_hugetlb_vmemmap_optimized(folio); vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; /* * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end) * to the page which @vmemmap_reuse is mapped to. Add pages previously * mapping the range to vmemmap_pages list so that they can be freed by * the caller. */ ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, vmemmap_pages, flags); if (ret) { static_branch_dec(&hugetlb_optimize_vmemmap_key); folio_clear_hugetlb_vmemmap_optimized(folio); } return ret; } /** * hugetlb_vmemmap_optimize_folio - optimize @folio's vmemmap pages. * @h: struct hstate. * @folio: the folio whose vmemmap pages will be optimized. * * This function only tries to optimize @folio's vmemmap pages and does not * guarantee that the optimization will succeed after it returns. The caller * can use folio_test_hugetlb_vmemmap_optimized(@folio) to detect if @folio's * vmemmap pages have been optimized. */ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio) { LIST_HEAD(vmemmap_pages); __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_SYNCHRONIZE_RCU); free_vmemmap_page_list(&vmemmap_pages); } static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *folio) { unsigned long vmemmap_start = (unsigned long)&folio->page, vmemmap_end; unsigned long vmemmap_reuse; if (!vmemmap_should_optimize_folio(h, folio)) return 0; vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); vmemmap_reuse = vmemmap_start; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; /* * Split PMDs on the vmemmap virtual address range [@vmemmap_start, * @vmemmap_end] */ return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse); } void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) { struct folio *folio; LIST_HEAD(vmemmap_pages); unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; list_for_each_entry(folio, folio_list, lru) { int ret = hugetlb_vmemmap_split_folio(h, folio); /* * Spliting the PMD requires allocating a page, thus lets fail * early once we encounter the first OOM. No point in retrying * as it can be dynamically done on remap with the memory * we get back from the vmemmap deduplication. */ if (ret == -ENOMEM) break; } flush_tlb_all(); list_for_each_entry(folio, folio_list, lru) { int ret; ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags); /* only need to synchronize_rcu() once for each batch */ flags &= ~VMEMMAP_SYNCHRONIZE_RCU; /* * Pages to be freed may have been accumulated. If we * encounter an ENOMEM, free what we have and try again. * This can occur in the case that both spliting fails * halfway and head page allocation also failed. In this * case __hugetlb_vmemmap_optimize_folio() would free memory * allowing more vmemmap remaps to occur. */ if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) { flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); INIT_LIST_HEAD(&vmemmap_pages); __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags); } } flush_tlb_all(); free_vmemmap_page_list(&vmemmap_pages); } static const struct ctl_table hugetlb_vmemmap_sysctls[] = { { .procname = "hugetlb_optimize_vmemmap", .data = &vmemmap_optimize_enabled, .maxlen = sizeof(vmemmap_optimize_enabled), .mode = 0644, .proc_handler = proc_dobool, }, }; static int __init hugetlb_vmemmap_init(void) { const struct hstate *h; /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */ BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES); for_each_hstate(h) { if (hugetlb_vmemmap_optimizable(h)) { register_sysctl_init("vm", hugetlb_vmemmap_sysctls); break; } } return 0; } late_initcall(hugetlb_vmemmap_init); |
28 146 118 18744 146 1223 419 506 403 8 10 222 91 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ #include <linux/bitfield.h> #include <linux/bug.h> #include <linux/const.h> #include <linux/limits.h> #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) #define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r) #define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r) #define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \ FIELD_PREP(GENMASK(7, 1), func)) /* Overridden by KUnit tests. */ #ifndef fortify_panic # define fortify_panic(func, write, avail, size, retfail) \ __fortify_panic(FORTIFY_REASON(func, write), avail, size) #endif #ifndef fortify_warn_once # define fortify_warn_once(x...) WARN_ONCE(x) #endif #define FORTIFY_READ 0 #define FORTIFY_WRITE 1 #define EACH_FORTIFY_FUNC(macro) \ macro(strncpy), \ macro(strnlen), \ macro(strlen), \ macro(strscpy), \ macro(strlcat), \ macro(strcat), \ macro(strncat), \ macro(memset), \ macro(memcpy), \ macro(memmove), \ macro(memscan), \ macro(memcmp), \ macro(memchr), \ macro(memchr_inv), \ macro(kmemdup), \ macro(strcpy), \ macro(UNKNOWN), #define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func enum fortify_func { EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC) }; void __fortify_report(const u8 reason, const size_t avail, const size_t size); void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn; void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?"); void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)"); void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("detected write beyond size of field (1st parameter); maybe use struct_group()?"); #define __compiletime_strlen(p) \ ({ \ char *__p = (char *)(p); \ size_t __ret = SIZE_MAX; \ const size_t __p_size = __member_size(p); \ if (__p_size != SIZE_MAX && \ __builtin_constant_p(*__p)) { \ size_t __p_len = __p_size - 1; \ if (__builtin_constant_p(__p[__p_len]) && \ __p[__p_len] == '\0') \ __ret = __builtin_strlen(__p); \ } \ __ret; \ }) #if defined(__SANITIZE_ADDRESS__) #if !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) && !defined(CONFIG_GENERIC_ENTRY) extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset); extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove); extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy); #elif defined(CONFIG_KASAN_GENERIC) extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(__asan_memset); extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(__asan_memmove); extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(__asan_memcpy); #else /* CONFIG_KASAN_SW_TAGS */ extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(__hwasan_memset); extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(__hwasan_memmove); extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(__hwasan_memcpy); #endif extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp); extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat); extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy); extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen); extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat); extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy); #else #if defined(__SANITIZE_MEMORY__) /* * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the * corresponding __msan_XXX functions. */ #include <linux/kmsan_string.h> #define __underlying_memcpy __msan_memcpy #define __underlying_memmove __msan_memmove #define __underlying_memset __msan_memset #else #define __underlying_memcpy __builtin_memcpy #define __underlying_memmove __builtin_memmove #define __underlying_memset __builtin_memset #endif #define __underlying_memchr __builtin_memchr #define __underlying_memcmp __builtin_memcmp #define __underlying_strcat __builtin_strcat #define __underlying_strcpy __builtin_strcpy #define __underlying_strlen __builtin_strlen #define __underlying_strncat __builtin_strncat #define __underlying_strncpy __builtin_strncpy #endif /** * unsafe_memcpy - memcpy implementation with no FORTIFY bounds checking * * @dst: Destination memory address to write to * @src: Source memory address to read from * @bytes: How many bytes to write to @dst from @src * @justification: Free-form text or comment describing why the use is needed * * This should be used for corner cases where the compiler cannot do the * right thing, or during transitions between APIs, etc. It should be used * very rarely, and includes a place for justification detailing where bounds * checking has happened, and why existing solutions cannot be employed. */ #define unsafe_memcpy(dst, src, bytes, justification) \ __underlying_memcpy(dst, src, bytes) /* * Clang's use of __builtin_*object_size() within inlines needs hinting via * __pass_*object_size(). The preference is to only ever use type 1 (member * size, rather than struct size), but there remain some stragglers using * type 0 that will be converted in the future. */ #if __has_builtin(__builtin_dynamic_object_size) #define POS __pass_dynamic_object_size(1) #define POS0 __pass_dynamic_object_size(0) #else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) #endif #define __compiletime_lessthan(bounds, length) ( \ __builtin_constant_p((bounds) < (length)) && \ (bounds) < (length) \ ) /** * strncpy - Copy a string to memory with non-guaranteed NUL padding * * @p: pointer to destination of copy * @q: pointer to NUL-terminated source string to copy * @size: bytes to write at @p * * If strlen(@q) >= @size, the copy of @q will stop after @size bytes, * and @p will NOT be NUL-terminated * * If strlen(@q) < @size, following the copy of @q, trailing NUL bytes * will be written to @p until @size total bytes have been written. * * Do not use this function. While FORTIFY_SOURCE tries to avoid * over-reads of @q, it cannot defend against writing unterminated * results to @p. Using strncpy() remains ambiguous and fragile. * Instead, please choose an alternative, so that the expectation * of @p's contents is unambiguous: * * +--------------------+--------------------+------------+ * | **p** needs to be: | padded to **size** | not padded | * +====================+====================+============+ * | NUL-terminated | strscpy_pad() | strscpy() | * +--------------------+--------------------+------------+ * | not NUL-terminated | strtomem_pad() | strtomem() | * +--------------------+--------------------+------------+ * * Note strscpy*()'s differing return values for detecting truncation, * and strtomem*()'s expectation that the destination is marked with * __nonstring when it is a character array. * */ __FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3) char *strncpy(char * const POS p, const char *q, __kernel_size_t size) { const size_t p_size = __member_size(p); if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p); return __underlying_strncpy(p, q, size); } extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); /** * strnlen - Return bounded count of characters in a NUL-terminated string * * @p: pointer to NUL-terminated string to count. * @maxlen: maximum number of characters to count. * * Returns number of characters in @p (NOT including the final NUL), or * @maxlen, if no NUL has been found up to there. * */ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { const size_t p_size = __member_size(p); const size_t p_len = __compiletime_strlen(p); size_t ret; /* We can take compile-time actions when maxlen is const. */ if (__builtin_constant_p(maxlen) && p_len != SIZE_MAX) { /* If p is const, we can use its compile-time-known len. */ if (maxlen >= p_size) return p_len; } /* Do not check characters beyond the end of p. */ ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } /* * Defined after fortified strnlen to reuse it. However, it must still be * possible for strlen() to be used on compile-time strings for use in * static initializers (i.e. as a constant expression). */ /** * strlen - Return count of characters in a NUL-terminated string * * @p: pointer to NUL-terminated string to count. * * Do not use this function unless the string length is known at * compile-time. When @p is unterminated, this function may crash * or return unexpected counts that could lead to memory content * exposures. Prefer strnlen(). * * Returns number of characters in @p (NOT including the final NUL). * */ #define strlen(p) \ __builtin_choose_expr(__is_constexpr(__builtin_strlen(p)), \ __builtin_strlen(p), __fortify_strlen(p)) __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) __kernel_size_t __fortify_strlen(const char * const POS p) { const size_t p_size = __member_size(p); __kernel_size_t ret; /* Give up if we don't know how large p is. */ if (p_size == SIZE_MAX) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } /* Defined after fortified strnlen() to reuse it. */ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy); __FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size) { /* Use string size rather than possible enclosing struct size. */ const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); size_t len; /* If we cannot get size of p and q default to call strscpy. */ if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strscpy(p, q, size); /* * If size can be known at compile time and is greater than * p_size, generate a compile time write overflow error. */ if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* Short-circuit for compile-time known-safe lengths. */ if (__compiletime_lessthan(p_size, SIZE_MAX)) { len = __compiletime_strlen(q); if (len < SIZE_MAX && __compiletime_lessthan(len, size)) { __underlying_memcpy(p, q, len + 1); return len; } } /* * This call protects from read overflow, because len will default to q * length if it smaller than size. */ len = strnlen(q, size); /* * If len equals size, we will copy only size bytes which leads to * -E2BIG being returned. * Otherwise we will copy len + 1 because of the final '\O'. */ len = len == size ? size : len + 1; /* * Generate a runtime write overflow error if len is greater than * p_size. */ if (p_size < len) fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG); /* * We can now safely call vanilla strscpy because we are protected from: * 1. Read overflow thanks to call to strnlen(). * 2. Write overflow thanks to above ifs. */ return __real_strscpy(p, q, len); } /* Defined after fortified strlen() to reuse it. */ extern size_t __real_strlcat(char *p, const char *q, size_t avail) __RENAME(strlcat); /** * strlcat - Append a string to an existing string * * @p: pointer to %NUL-terminated string to append to * @q: pointer to %NUL-terminated string to append from * @avail: Maximum bytes available in @p * * Appends %NUL-terminated string @q after the %NUL-terminated * string at @p, but will not write beyond @avail bytes total, * potentially truncating the copy from @q. @p will stay * %NUL-terminated only if a %NUL already existed within * the @avail bytes of @p. If so, the resulting number of * bytes copied from @q will be at most "@avail - strlen(@p) - 1". * * Do not use this function. While FORTIFY_SOURCE tries to avoid * read and write overflows, this is only possible when the sizes * of @p and @q are known to the compiler. Prefer building the * string with formatting, via scnprintf(), seq_buf, or similar. * * Returns total bytes that _would_ have been contained by @p * regardless of truncation, similar to snprintf(). If return * value is >= @avail, the string has been truncated. * */ __FORTIFY_INLINE size_t strlcat(char * const POS p, const char * const POS q, size_t avail) { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); size_t p_len, copy_len; size_t actual, wanted; /* Give up immediately if both buffer sizes are unknown. */ if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strlcat(p, q, avail); p_len = strnlen(p, avail); copy_len = strlen(q); wanted = actual = p_len + copy_len; /* Cannot append any more: report truncation. */ if (avail <= p_len) return wanted; /* Give up if string is already overflowed. */ if (p_size <= p_len) fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted); if (actual >= avail) { copy_len = avail - p_len - 1; actual = p_len + copy_len; } /* Give up if copy will overflow. */ if (p_size <= actual) fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted); __underlying_memcpy(p + p_len, q, copy_len); p[actual] = '\0'; return wanted; } /* Defined after fortified strlcat() to reuse it. */ /** * strcat - Append a string to an existing string * * @p: pointer to NUL-terminated string to append to * @q: pointer to NUL-terminated source string to append from * * Do not use this function. While FORTIFY_SOURCE tries to avoid * read and write overflows, this is only possible when the * destination buffer size is known to the compiler. Prefer * building the string with formatting, via scnprintf() or similar. * At the very least, use strncat(). * * Returns @p. * */ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { const size_t p_size = __member_size(p); const size_t wanted = strlcat(p, q, p_size); if (p_size <= wanted) fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p); return p; } /** * strncat - Append a string to an existing string * * @p: pointer to NUL-terminated string to append to * @q: pointer to source string to append from * @count: Maximum bytes to read from @q * * Appends at most @count bytes from @q (stopping at the first * NUL byte) after the NUL-terminated string at @p. @p will be * NUL-terminated. * * Do not use this function. While FORTIFY_SOURCE tries to avoid * read and write overflows, this is only possible when the sizes * of @p and @q are known to the compiler. Prefer building the * string with formatting, via scnprintf() or similar. * * Returns @p. * */ /* Defined after fortified strlen() and strnlen() to reuse them. */ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); size_t p_len, copy_len, total; if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); total = p_len + copy_len + 1; if (p_size < total) fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p); __underlying_memcpy(p + p_len, q, copy_len); p[p_len + copy_len] = '\0'; return p; } __FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size, const size_t p_size, const size_t p_size_field) { if (__builtin_constant_p(size)) { /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ if (__compiletime_lessthan(p_size_field, p_size) && __compiletime_lessthan(p_size, size)) __write_overflow(); /* Warn when write size is larger than dest field. */ if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); } /* * At this point, length argument may not be a constant expression, * so run-time bounds checking can be done where buffer sizes are * known. (This is not an "else" because the above checks may only * be compile-time warnings, and we want to still warn for run-time * overflows.) */ /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ if (p_size != SIZE_MAX && p_size < size) fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true); return false; } #define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \ size_t __fortify_size = (size_t)(size); \ fortify_memset_chk(__fortify_size, p_size, p_size_field), \ __underlying_memset(p, c, __fortify_size); \ }) /* * __struct_size() vs __member_size() must be captured here to avoid * evaluating argument side-effects further into the macro layers. */ #ifndef CONFIG_KMSAN #define memset(p, c, s) __fortify_memset_chk(p, c, s, \ __struct_size(p), __member_size(p)) #endif /* * To make sure the compiler can enforce protection against buffer overflows, * memcpy(), memmove(), and memset() must not be used beyond individual * struct members. If you need to copy across multiple members, please use * struct_group() to create a named mirror of an anonymous struct union. * (e.g. see struct sk_buff.) Read overflow checking is currently only * done when a write overflow is also present, or when building with W=1. * * Mitigation coverage matrix * Bounds checking at: * +-------+-------+-------+-------+ * | Compile time | Run time | * memcpy() argument sizes: | write | read | write | read | * dest source length +-------+-------+-------+-------+ * memcpy(known, known, constant) | y | y | n/a | n/a | * memcpy(known, unknown, constant) | y | n | n/a | V | * memcpy(known, known, dynamic) | n | n | B | B | * memcpy(known, unknown, dynamic) | n | n | B | V | * memcpy(unknown, known, constant) | n | y | V | n/a | * memcpy(unknown, unknown, constant) | n | n | V | V | * memcpy(unknown, known, dynamic) | n | n | V | B | * memcpy(unknown, unknown, dynamic) | n | n | V | V | * +-------+-------+-------+-------+ * * y = perform deterministic compile-time bounds checking * n = cannot perform deterministic compile-time bounds checking * n/a = no run-time bounds checking needed since compile-time deterministic * B = can perform run-time bounds checking (currently unimplemented) * V = vulnerable to run-time overflow (will need refactoring to solve) * */ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t p_size, const size_t q_size, const size_t p_size_field, const size_t q_size_field, const u8 func) { if (__builtin_constant_p(size)) { /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ if (__compiletime_lessthan(p_size_field, p_size) && __compiletime_lessthan(p_size, size)) __write_overflow(); if (__compiletime_lessthan(q_size_field, q_size) && __compiletime_lessthan(q_size, size)) __read_overflow2(); /* Warn when write size argument larger than dest field. */ if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); /* * Warn for source field over-read when building with W=1 * or when an over-write happened, so both can be fixed at * the same time. */ if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || __compiletime_lessthan(p_size_field, size)) && __compiletime_lessthan(q_size_field, size)) __read_overflow2_field(q_size_field, size); } /* * At this point, length argument may not be a constant expression, * so run-time bounds checking can be done where buffer sizes are * known. (This is not an "else" because the above checks may only * be compile-time warnings, and we want to still warn for run-time * overflows.) */ /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ if (p_size != SIZE_MAX && p_size < size) fortify_panic(func, FORTIFY_WRITE, p_size, size, true); else if (q_size != SIZE_MAX && q_size < size) fortify_panic(func, FORTIFY_READ, p_size, size, true); /* * Warn when writing beyond destination field size. * * Note the implementation of __builtin_*object_size() behaves * like sizeof() when not directly referencing a flexible * array member, which means there will be many bounds checks * that will appear at run-time, without a way for them to be * detected at compile-time (as can be done when the destination * is specifically the flexible array member). * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 */ if (p_size_field != SIZE_MAX && p_size != p_size_field && p_size_field < size) return true; return false; } /* * To work around what seems to be an optimizer bug, the macro arguments * need to have const copies or the values end up changed by the time they * reach fortify_warn_once(). See commit 6f7630b1b5bc ("fortify: Capture * __bos() results in const temp vars") for more details. */ #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ const size_t __fortify_size = (size_t)(size); \ const size_t __p_size = (p_size); \ const size_t __q_size = (q_size); \ const size_t __p_size_field = (p_size_field); \ const size_t __q_size_field = (q_size_field); \ /* Keep a mutable version of the size for the final copy. */ \ size_t __copy_size = __fortify_size; \ fortify_warn_once(fortify_memcpy_chk(__fortify_size, __p_size, \ __q_size, __p_size_field, \ __q_size_field, FORTIFY_FUNC_ ##op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ "field \"" #p "\" at " FILE_LINE, \ __p_size_field); \ /* Hide only the run-time size from value range tracking to */ \ /* silence compile-time false positive bounds warnings. */ \ if (!__builtin_constant_p(__copy_size)) \ OPTIMIZER_HIDE_VAR(__copy_size); \ __underlying_##op(p, q, __copy_size); \ }) /* * Notes about compile-time buffer size detection: * * With these types... * * struct middle { * u16 a; * u8 middle_buf[16]; * int b; * }; * struct end { * u16 a; * u8 end_buf[16]; * }; * struct flex { * int a; * u8 flex_buf[]; * }; * * void func(TYPE *ptr) { ... } * * Cases where destination size cannot be currently detected: * - the size of ptr's object (seemingly by design, gcc & clang fail): * __builtin_object_size(ptr, 1) == SIZE_MAX * - the size of flexible arrays in ptr's obj (by design, dynamic size): * __builtin_object_size(ptr->flex_buf, 1) == SIZE_MAX * - the size of ANY array at the end of ptr's obj (gcc and clang bug): * __builtin_object_size(ptr->end_buf, 1) == SIZE_MAX * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836 * * Cases where destination size is currently detected: * - the size of non-array members within ptr's object: * __builtin_object_size(ptr->a, 1) == 2 * - the size of non-flexible-array in the middle of ptr's obj: * __builtin_object_size(ptr->middle_buf, 1) == 16 * */ /* * __struct_size() vs __member_size() must be captured here to avoid * evaluating argument side-effects further into the macro layers. */ #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ __struct_size(p), __struct_size(q), \ __member_size(p), __member_size(q), \ memcpy) #define memmove(p, q, s) __fortify_memcpy_chk(p, q, s, \ __struct_size(p), __struct_size(q), \ __member_size(p), __member_size(q), \ memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) { const size_t p_size = __struct_size(p); if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL); return __real_memscan(p, c, size); } __FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3) int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size) { const size_t p_size = __struct_size(p); const size_t q_size = __struct_size(q); if (__builtin_constant_p(size)) { if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } if (p_size < size) fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN); else if (q_size < size) fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN); return __underlying_memcmp(p, q, size); } __FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3) void *memchr(const void * const POS0 p, int c, __kernel_size_t size) { const size_t p_size = __struct_size(p); if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL); return __underlying_memchr(p, c, size); } void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) { const size_t p_size = __struct_size(p); if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL); return __real_memchr_inv(p, c, size); } extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup_noprof) __realloc_size(2); __FORTIFY_INLINE void *kmemdup_noprof(const void * const POS0 p, size_t size, gfp_t gfp) { const size_t p_size = __struct_size(p); if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, __real_kmemdup(p, 0, gfp)); return __real_kmemdup(p, size, gfp); } #define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__)) /** * strcpy - Copy a string into another string buffer * * @p: pointer to destination of copy * @q: pointer to NUL-terminated source string to copy * * Do not use this function. While FORTIFY_SOURCE tries to avoid * overflows, this is only possible when the sizes of @q and @p are * known to the compiler. Prefer strscpy(), though note its different * return values for detecting truncation. * * Returns @p. * */ /* Defined after fortified strlen to reuse it. */ __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) char *strcpy(char * const POS p, const char * const POS q) { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); size_t size; /* If neither buffer size is known, immediately give up. */ if (__builtin_constant_p(p_size) && __builtin_constant_p(q_size) && p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strcpy(p, q); size = strlen(q) + 1; /* Compile-time check for const size overflow. */ if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p); __underlying_memcpy(p, q, size); return p; } /* Don't use these outside the FORITFY_SOURCE implementation */ #undef __underlying_memchr #undef __underlying_memcmp #undef __underlying_strcat #undef __underlying_strcpy #undef __underlying_strlen #undef __underlying_strncat #undef __underlying_strncpy #undef POS #undef POS0 #endif /* _LINUX_FORTIFY_STRING_H_ */ |
20641 20655 20638 20662 20756 20637 20654 20646 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 | // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/memblock.h> #include <linux/page_ext.h> #include <linux/memory.h> #include <linux/vmalloc.h> #include <linux/kmemleak.h> #include <linux/page_owner.h> #include <linux/page_idle.h> #include <linux/page_table_check.h> #include <linux/rcupdate.h> #include <linux/pgalloc_tag.h> /* * struct page extension * * This is the feature to manage memory for extended data per page. * * Until now, we must modify struct page itself to store extra data per page. * This requires rebuilding the kernel and it is really time consuming process. * And, sometimes, rebuild is impossible due to third party module dependency. * At last, enlarging struct page could cause un-wanted system behaviour change. * * This feature is intended to overcome above mentioned problems. This feature * allocates memory for extended data per page in certain place rather than * the struct page itself. This memory can be accessed by the accessor * functions provided by this code. During the boot process, it checks whether * allocation of huge chunk of memory is needed or not. If not, it avoids * allocating memory at all. With this advantage, we can include this feature * into the kernel in default and can avoid rebuild and solve related problems. * * To help these things to work well, there are two callbacks for clients. One * is the need callback which is mandatory if user wants to avoid useless * memory allocation at boot-time. The other is optional, init callback, which * is used to do proper initialization after memory is allocated. * * The need callback is used to decide whether extended memory allocation is * needed or not. Sometimes users want to deactivate some features in this * boot and extra memory would be unnecessary. In this case, to avoid * allocating huge chunk of memory, each clients represent their need of * extra memory through the need callback. If one of the need callbacks * returns true, it means that someone needs extra memory so that * page extension core should allocates memory for page extension. If * none of need callbacks return true, memory isn't needed at all in this boot * and page extension core can skip to allocate memory. As result, * none of memory is wasted. * * When need callback returns true, page_ext checks if there is a request for * extra memory through size in struct page_ext_operations. If it is non-zero, * extra space is allocated for each page_ext entry and offset is returned to * user through offset in struct page_ext_operations. * * The init callback is used to do proper initialization after page extension * is completely initialized. In sparse memory system, extra memory is * allocated some time later than memmap is allocated. In other words, lifetime * of memory for page extension isn't same with memmap for struct page. * Therefore, clients can't store extra data until page extension is * initialized, even if pages are allocated and used freely. This could * cause inadequate state of extra data per page, so, to prevent it, client * can utilize this callback to initialize the state of it correctly. */ #ifdef CONFIG_SPARSEMEM #define PAGE_EXT_INVALID (0x1) #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) static bool need_page_idle(void) { return true; } static struct page_ext_operations page_idle_ops __initdata = { .need = need_page_idle, .need_shared_flags = true, }; #endif static struct page_ext_operations *page_ext_ops[] __initdata = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif #ifdef CONFIG_MEM_ALLOC_PROFILING &page_alloc_tagging_ops, #endif #ifdef CONFIG_PAGE_TABLE_CHECK &page_table_check_ops, #endif }; unsigned long page_ext_size; static unsigned long total_usage; #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG /* * To ensure correct allocation tagging for pages, page_ext should be available * before the first page allocation. Otherwise early task stacks will be * allocated before page_ext initialization and missing tags will be flagged. */ bool early_page_ext __meminitdata = true; #else bool early_page_ext __meminitdata; #endif static int __init setup_early_page_ext(char *str) { early_page_ext = true; return 0; } early_param("early_page_ext", setup_early_page_ext); static bool __init invoke_need_callbacks(void) { int i; int entries = ARRAY_SIZE(page_ext_ops); bool need = false; for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need()) { if (page_ext_ops[i]->need_shared_flags) { page_ext_size = sizeof(struct page_ext); break; } } } for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need()) { page_ext_ops[i]->offset = page_ext_size; page_ext_size += page_ext_ops[i]->size; need = true; } } return need; } static void __init invoke_init_callbacks(void) { int i; int entries = ARRAY_SIZE(page_ext_ops); for (i = 0; i < entries; i++) { if (page_ext_ops[i]->init) page_ext_ops[i]->init(); } } static inline struct page_ext *get_entry(void *base, unsigned long index) { return base + page_ext_size * index; } #ifndef CONFIG_SPARSEMEM void __init page_ext_init_flatmem_late(void) { invoke_init_callbacks(); } void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) { pgdat->node_page_ext = NULL; } static struct page_ext *lookup_page_ext(const struct page *page) { unsigned long pfn = page_to_pfn(page); unsigned long index; struct page_ext *base; WARN_ON_ONCE(!rcu_read_lock_held()); base = NODE_DATA(page_to_nid(page))->node_page_ext; /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. */ if (unlikely(!base)) return NULL; index = pfn - round_down(node_start_pfn(page_to_nid(page)), MAX_ORDER_NR_PAGES); return get_entry(base, index); } static int __init alloc_node_page_ext(int nid) { struct page_ext *base; unsigned long table_size; unsigned long nr_pages; nr_pages = NODE_DATA(nid)->node_spanned_pages; if (!nr_pages) return 0; /* * Need extra space if node range is not aligned with * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm * checks buddy's status, range could be out of exact node range. */ if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) || !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) nr_pages += MAX_ORDER_NR_PAGES; table_size = page_ext_size * nr_pages; base = memblock_alloc_try_nid( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); if (!base) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } void __init page_ext_init_flatmem(void) { int nid, fail; if (!invoke_need_callbacks()) return; for_each_online_node(nid) { fail = alloc_node_page_ext(nid); if (fail) goto fail; } pr_info("allocated %ld bytes of page_ext\n", total_usage); return; fail: pr_crit("allocation of page_ext failed.\n"); panic("Out of memory"); } #else /* CONFIG_SPARSEMEM */ static bool page_ext_invalid(struct page_ext *page_ext) { return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID); } static struct page_ext *lookup_page_ext(const struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); struct page_ext *page_ext = READ_ONCE(section->page_ext); WARN_ON_ONCE(!rcu_read_lock_held()); /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. */ if (page_ext_invalid(page_ext)) return NULL; return get_entry(page_ext, pfn); } static void *__meminit alloc_page_ext(size_t size, int nid) { gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; void *addr = NULL; addr = alloc_pages_exact_nid(nid, size, flags); if (addr) kmemleak_alloc(addr, size, 1, flags); else addr = vzalloc_node(size, nid); if (addr) memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } static int __meminit init_section_page_ext(unsigned long pfn, int nid) { struct mem_section *section; struct page_ext *base; unsigned long table_size; section = __pfn_to_section(pfn); if (section->page_ext) return 0; table_size = page_ext_size * PAGES_PER_SECTION; base = alloc_page_ext(table_size, nid); /* * The value stored in section->page_ext is (base - pfn) * and it does not point to the memory block allocated above, * causing kmemleak false positives. */ kmemleak_not_leak(base); if (!base) { pr_err("page ext allocation failure\n"); return -ENOMEM; } /* * The passed "pfn" may not be aligned to SECTION. For the calculation * we need to apply a mask. */ pfn &= PAGE_SECTION_MASK; section->page_ext = (void *)base - page_ext_size * pfn; total_usage += table_size; return 0; } static void free_page_ext(void *addr) { size_t table_size; struct page *page; table_size = page_ext_size * PAGES_PER_SECTION; memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { vfree(addr); } else { page = virt_to_page(addr); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } } static void __free_page_ext(unsigned long pfn) { struct mem_section *ms; struct page_ext *base; ms = __pfn_to_section(pfn); if (!ms || !ms->page_ext) return; base = READ_ONCE(ms->page_ext); /* * page_ext here can be valid while doing the roll back * operation in online_page_ext(). */ if (page_ext_invalid(base)) base = (void *)base - PAGE_EXT_INVALID; WRITE_ONCE(ms->page_ext, NULL); base = get_entry(base, pfn); free_page_ext(base); } static void __invalidate_page_ext(unsigned long pfn) { struct mem_section *ms; void *val; ms = __pfn_to_section(pfn); if (!ms || !ms->page_ext) return; val = (void *)ms->page_ext + PAGE_EXT_INVALID; WRITE_ONCE(ms->page_ext, val); } static int __meminit online_page_ext(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; int fail = 0; start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); if (nid == NUMA_NO_NODE) { /* * In this case, "nid" already exists and contains valid memory. * "start_pfn" passed to us is a pfn which is an arg for * online__pages(), and start_pfn should exist. */ nid = pfn_to_nid(start_pfn); VM_BUG_ON(!node_online(nid)); } for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) fail = init_section_page_ext(pfn, nid); if (!fail) return 0; /* rollback */ end = pfn - PAGES_PER_SECTION; for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_ext(pfn); return -ENOMEM; } static void __meminit offline_page_ext(unsigned long start_pfn, unsigned long nr_pages) { unsigned long start, end, pfn; start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); /* * Freeing of page_ext is done in 3 steps to avoid * use-after-free of it: * 1) Traverse all the sections and mark their page_ext * as invalid. * 2) Wait for all the existing users of page_ext who * started before invalidation to finish. * 3) Free the page_ext. */ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __invalidate_page_ext(pfn); synchronize_rcu(); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_ext(pfn); } static int __meminit page_ext_callback(struct notifier_block *self, unsigned long action, void *arg) { struct memory_notify *mn = arg; int ret = 0; switch (action) { case MEM_GOING_ONLINE: ret = online_page_ext(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; case MEM_OFFLINE: offline_page_ext(mn->start_pfn, mn->nr_pages); break; case MEM_CANCEL_ONLINE: offline_page_ext(mn->start_pfn, mn->nr_pages); break; case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } return notifier_from_errno(ret); } void __init page_ext_init(void) { unsigned long pfn; int nid; if (!invoke_need_callbacks()) return; for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; start_pfn = node_start_pfn(nid); end_pfn = node_end_pfn(nid); /* * start_pfn and end_pfn may not be aligned to SECTION and the * page->flags of out of node pages are not initialized. So we * scan [start_pfn, the biggest section's pfn < end_pfn) here. */ for (pfn = start_pfn; pfn < end_pfn; pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { if (!pfn_valid(pfn)) continue; /* * Nodes's pfns can be overlapping. * We know some arch can have a nodes layout such as * -------------pfn--------------> * N0 | N1 | N2 | N0 | N1 | N2|.... */ if (pfn_to_nid(pfn) != nid) continue; if (init_section_page_ext(pfn, nid)) goto oom; cond_resched(); } } hotplug_memory_notifier(page_ext_callback, DEFAULT_CALLBACK_PRI); pr_info("allocated %ld bytes of page_ext\n", total_usage); invoke_init_callbacks(); return; oom: panic("Out of memory"); } void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) { } #endif /** * page_ext_get() - Get the extended information for a page. * @page: The page we're interested in. * * Ensures that the page_ext will remain valid until page_ext_put() * is called. * * Return: NULL if no page_ext exists for this page. * Context: Any context. Caller may not sleep until they have called * page_ext_put(). */ struct page_ext *page_ext_get(const struct page *page) { struct page_ext *page_ext; rcu_read_lock(); page_ext = lookup_page_ext(page); if (!page_ext) { rcu_read_unlock(); return NULL; } return page_ext; } /** * page_ext_put() - Working with page extended information is done. * @page_ext: Page extended information received from page_ext_get(). * * The page extended information of the page may not be valid after this * function is called. * * Return: None. * Context: Any context with corresponding page_ext_get() is called. */ void page_ext_put(struct page_ext *page_ext) { if (unlikely(!page_ext)) return; rcu_read_unlock(); } |
2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 | // SPDX-License-Identifier: GPL-2.0-only /* * Driver for the Diolan DLN-2 USB adapter * * Copyright (c) 2014 Intel Corporation * * Derived from: * i2c-diolan-u2c.c * Copyright (c) 2010-2011 Ericsson AB */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/mfd/core.h> #include <linux/mfd/dln2.h> #include <linux/rculist.h> struct dln2_header { __le16 size; __le16 id; __le16 echo; __le16 handle; }; struct dln2_response { struct dln2_header hdr; __le16 result; }; #define DLN2_GENERIC_MODULE_ID 0x00 #define DLN2_GENERIC_CMD(cmd) DLN2_CMD(cmd, DLN2_GENERIC_MODULE_ID) #define CMD_GET_DEVICE_VER DLN2_GENERIC_CMD(0x30) #define CMD_GET_DEVICE_SN DLN2_GENERIC_CMD(0x31) #define DLN2_HW_ID 0x200 #define DLN2_USB_TIMEOUT 200 /* in ms */ #define DLN2_MAX_RX_SLOTS 16 #define DLN2_MAX_URBS 16 #define DLN2_RX_BUF_SIZE 512 enum dln2_handle { DLN2_HANDLE_EVENT = 0, /* don't change, hardware defined */ DLN2_HANDLE_CTRL, DLN2_HANDLE_GPIO, DLN2_HANDLE_I2C, DLN2_HANDLE_SPI, DLN2_HANDLE_ADC, DLN2_HANDLES }; /* * Receive context used between the receive demultiplexer and the transfer * routine. While sending a request the transfer routine will look for a free * receive context and use it to wait for a response and to receive the URB and * thus the response data. */ struct dln2_rx_context { /* completion used to wait for a response */ struct completion done; /* if non-NULL the URB contains the response */ struct urb *urb; /* if true then this context is used to wait for a response */ bool in_use; }; /* * Receive contexts for a particular DLN2 module (i2c, gpio, etc.). We use the * handle header field to identify the module in dln2_dev.mod_rx_slots and then * the echo header field to index the slots field and find the receive context * for a particular request. */ struct dln2_mod_rx_slots { /* RX slots bitmap */ DECLARE_BITMAP(bmap, DLN2_MAX_RX_SLOTS); /* used to wait for a free RX slot */ wait_queue_head_t wq; /* used to wait for an RX operation to complete */ struct dln2_rx_context slots[DLN2_MAX_RX_SLOTS]; /* avoid races between alloc/free_rx_slot and dln2_rx_transfer */ spinlock_t lock; }; struct dln2_dev { struct usb_device *usb_dev; struct usb_interface *interface; u8 ep_in; u8 ep_out; struct urb *rx_urb[DLN2_MAX_URBS]; void *rx_buf[DLN2_MAX_URBS]; struct dln2_mod_rx_slots mod_rx_slots[DLN2_HANDLES]; struct list_head event_cb_list; spinlock_t event_cb_lock; bool disconnect; int active_transfers; wait_queue_head_t disconnect_wq; spinlock_t disconnect_lock; }; struct dln2_event_cb_entry { struct list_head list; u16 id; struct platform_device *pdev; dln2_event_cb_t callback; }; int dln2_register_event_cb(struct platform_device *pdev, u16 id, dln2_event_cb_t event_cb) { struct dln2_dev *dln2 = dev_get_drvdata(pdev->dev.parent); struct dln2_event_cb_entry *i, *entry; unsigned long flags; int ret = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->id = id; entry->callback = event_cb; entry->pdev = pdev; spin_lock_irqsave(&dln2->event_cb_lock, flags); list_for_each_entry(i, &dln2->event_cb_list, list) { if (i->id == id) { ret = -EBUSY; break; } } if (!ret) list_add_rcu(&entry->list, &dln2->event_cb_list); spin_unlock_irqrestore(&dln2->event_cb_lock, flags); if (ret) kfree(entry); return ret; } EXPORT_SYMBOL(dln2_register_event_cb); void dln2_unregister_event_cb(struct platform_device *pdev, u16 id) { struct dln2_dev *dln2 = dev_get_drvdata(pdev->dev.parent); struct dln2_event_cb_entry *i; unsigned long flags; bool found = false; spin_lock_irqsave(&dln2->event_cb_lock, flags); list_for_each_entry(i, &dln2->event_cb_list, list) { if (i->id == id) { list_del_rcu(&i->list); found = true; break; } } spin_unlock_irqrestore(&dln2->event_cb_lock, flags); if (found) { synchronize_rcu(); kfree(i); } } EXPORT_SYMBOL(dln2_unregister_event_cb); /* * Returns true if a valid transfer slot is found. In this case the URB must not * be resubmitted immediately in dln2_rx as we need the data when dln2_transfer * is woke up. It will be resubmitted there. */ static bool dln2_transfer_complete(struct dln2_dev *dln2, struct urb *urb, u16 handle, u16 rx_slot) { struct device *dev = &dln2->interface->dev; struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[handle]; struct dln2_rx_context *rxc; unsigned long flags; bool valid_slot = false; if (rx_slot >= DLN2_MAX_RX_SLOTS) goto out; rxc = &rxs->slots[rx_slot]; spin_lock_irqsave(&rxs->lock, flags); if (rxc->in_use && !rxc->urb) { rxc->urb = urb; complete(&rxc->done); valid_slot = true; } spin_unlock_irqrestore(&rxs->lock, flags); out: if (!valid_slot) dev_warn(dev, "bad/late response %d/%d\n", handle, rx_slot); return valid_slot; } static void dln2_run_event_callbacks(struct dln2_dev *dln2, u16 id, u16 echo, void *data, int len) { struct dln2_event_cb_entry *i; rcu_read_lock(); list_for_each_entry_rcu(i, &dln2->event_cb_list, list) { if (i->id == id) { i->callback(i->pdev, echo, data, len); break; } } rcu_read_unlock(); } static void dln2_rx(struct urb *urb) { struct dln2_dev *dln2 = urb->context; struct dln2_header *hdr = urb->transfer_buffer; struct device *dev = &dln2->interface->dev; u16 id, echo, handle, size; u8 *data; int len; int err; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPIPE: /* this urb is terminated, clean up */ dev_dbg(dev, "urb shutting down with status %d\n", urb->status); return; default: dev_dbg(dev, "nonzero urb status received %d\n", urb->status); goto out; } if (urb->actual_length < sizeof(struct dln2_header)) { dev_err(dev, "short response: %d\n", urb->actual_length); goto out; } handle = le16_to_cpu(hdr->handle); id = le16_to_cpu(hdr->id); echo = le16_to_cpu(hdr->echo); size = le16_to_cpu(hdr->size); if (size != urb->actual_length) { dev_err(dev, "size mismatch: handle %x cmd %x echo %x size %d actual %d\n", handle, id, echo, size, urb->actual_length); goto out; } if (handle >= DLN2_HANDLES) { dev_warn(dev, "invalid handle %d\n", handle); goto out; } data = urb->transfer_buffer + sizeof(struct dln2_header); len = urb->actual_length - sizeof(struct dln2_header); if (handle == DLN2_HANDLE_EVENT) { unsigned long flags; spin_lock_irqsave(&dln2->event_cb_lock, flags); dln2_run_event_callbacks(dln2, id, echo, data, len); spin_unlock_irqrestore(&dln2->event_cb_lock, flags); } else { /* URB will be re-submitted in _dln2_transfer (free_rx_slot) */ if (dln2_transfer_complete(dln2, urb, handle, echo)) return; } out: err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) dev_err(dev, "failed to resubmit RX URB: %d\n", err); } static void *dln2_prep_buf(u16 handle, u16 cmd, u16 echo, const void *obuf, int *obuf_len, gfp_t gfp) { int len; void *buf; struct dln2_header *hdr; len = *obuf_len + sizeof(*hdr); buf = kmalloc(len, gfp); if (!buf) return NULL; hdr = (struct dln2_header *)buf; hdr->id = cpu_to_le16(cmd); hdr->size = cpu_to_le16(len); hdr->echo = cpu_to_le16(echo); hdr->handle = cpu_to_le16(handle); memcpy(buf + sizeof(*hdr), obuf, *obuf_len); *obuf_len = len; return buf; } static int dln2_send_wait(struct dln2_dev *dln2, u16 handle, u16 cmd, u16 echo, const void *obuf, int obuf_len) { int ret = 0; int len = obuf_len; void *buf; int actual; buf = dln2_prep_buf(handle, cmd, echo, obuf, &len, GFP_KERNEL); if (!buf) return -ENOMEM; ret = usb_bulk_msg(dln2->usb_dev, usb_sndbulkpipe(dln2->usb_dev, dln2->ep_out), buf, len, &actual, DLN2_USB_TIMEOUT); kfree(buf); return ret; } static bool find_free_slot(struct dln2_dev *dln2, u16 handle, int *slot) { struct dln2_mod_rx_slots *rxs; unsigned long flags; if (dln2->disconnect) { *slot = -ENODEV; return true; } rxs = &dln2->mod_rx_slots[handle]; spin_lock_irqsave(&rxs->lock, flags); *slot = find_first_zero_bit(rxs->bmap, DLN2_MAX_RX_SLOTS); if (*slot < DLN2_MAX_RX_SLOTS) { struct dln2_rx_context *rxc = &rxs->slots[*slot]; set_bit(*slot, rxs->bmap); rxc->in_use = true; } spin_unlock_irqrestore(&rxs->lock, flags); return *slot < DLN2_MAX_RX_SLOTS; } static int alloc_rx_slot(struct dln2_dev *dln2, u16 handle) { int ret; int slot; /* * No need to timeout here, the wait is bounded by the timeout in * _dln2_transfer. */ ret = wait_event_interruptible(dln2->mod_rx_slots[handle].wq, find_free_slot(dln2, handle, &slot)); if (ret < 0) return ret; return slot; } static void free_rx_slot(struct dln2_dev *dln2, u16 handle, int slot) { struct dln2_mod_rx_slots *rxs; struct urb *urb = NULL; unsigned long flags; struct dln2_rx_context *rxc; rxs = &dln2->mod_rx_slots[handle]; spin_lock_irqsave(&rxs->lock, flags); clear_bit(slot, rxs->bmap); rxc = &rxs->slots[slot]; rxc->in_use = false; urb = rxc->urb; rxc->urb = NULL; reinit_completion(&rxc->done); spin_unlock_irqrestore(&rxs->lock, flags); if (urb) { int err; struct device *dev = &dln2->interface->dev; err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) dev_err(dev, "failed to resubmit RX URB: %d\n", err); } wake_up_interruptible(&rxs->wq); } static int _dln2_transfer(struct dln2_dev *dln2, u16 handle, u16 cmd, const void *obuf, unsigned obuf_len, void *ibuf, unsigned *ibuf_len) { int ret = 0; int rx_slot; struct dln2_response *rsp; struct dln2_rx_context *rxc; struct device *dev = &dln2->interface->dev; const unsigned long timeout = msecs_to_jiffies(DLN2_USB_TIMEOUT); struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[handle]; int size; spin_lock(&dln2->disconnect_lock); if (!dln2->disconnect) dln2->active_transfers++; else ret = -ENODEV; spin_unlock(&dln2->disconnect_lock); if (ret) return ret; rx_slot = alloc_rx_slot(dln2, handle); if (rx_slot < 0) { ret = rx_slot; goto out_decr; } ret = dln2_send_wait(dln2, handle, cmd, rx_slot, obuf, obuf_len); if (ret < 0) { dev_err(dev, "USB write failed: %d\n", ret); goto out_free_rx_slot; } rxc = &rxs->slots[rx_slot]; ret = wait_for_completion_interruptible_timeout(&rxc->done, timeout); if (ret <= 0) { if (!ret) ret = -ETIMEDOUT; goto out_free_rx_slot; } else { ret = 0; } if (dln2->disconnect) { ret = -ENODEV; goto out_free_rx_slot; } /* if we got here we know that the response header has been checked */ rsp = rxc->urb->transfer_buffer; size = le16_to_cpu(rsp->hdr.size); if (size < sizeof(*rsp)) { ret = -EPROTO; goto out_free_rx_slot; } if (le16_to_cpu(rsp->result) > 0x80) { dev_dbg(dev, "%d received response with error %d\n", handle, le16_to_cpu(rsp->result)); ret = -EREMOTEIO; goto out_free_rx_slot; } if (!ibuf) goto out_free_rx_slot; if (*ibuf_len > size - sizeof(*rsp)) *ibuf_len = size - sizeof(*rsp); memcpy(ibuf, rsp + 1, *ibuf_len); out_free_rx_slot: free_rx_slot(dln2, handle, rx_slot); out_decr: spin_lock(&dln2->disconnect_lock); dln2->active_transfers--; spin_unlock(&dln2->disconnect_lock); if (dln2->disconnect) wake_up(&dln2->disconnect_wq); return ret; } int dln2_transfer(struct platform_device *pdev, u16 cmd, const void *obuf, unsigned obuf_len, void *ibuf, unsigned *ibuf_len) { struct dln2_platform_data *dln2_pdata; struct dln2_dev *dln2; u16 handle; dln2 = dev_get_drvdata(pdev->dev.parent); dln2_pdata = dev_get_platdata(&pdev->dev); handle = dln2_pdata->handle; return _dln2_transfer(dln2, handle, cmd, obuf, obuf_len, ibuf, ibuf_len); } EXPORT_SYMBOL(dln2_transfer); static int dln2_check_hw(struct dln2_dev *dln2) { int ret; __le32 hw_type; int len = sizeof(hw_type); ret = _dln2_transfer(dln2, DLN2_HANDLE_CTRL, CMD_GET_DEVICE_VER, NULL, 0, &hw_type, &len); if (ret < 0) return ret; if (len < sizeof(hw_type)) return -EREMOTEIO; if (le32_to_cpu(hw_type) != DLN2_HW_ID) { dev_err(&dln2->interface->dev, "Device ID 0x%x not supported\n", le32_to_cpu(hw_type)); return -ENODEV; } return 0; } static int dln2_print_serialno(struct dln2_dev *dln2) { int ret; __le32 serial_no; int len = sizeof(serial_no); struct device *dev = &dln2->interface->dev; ret = _dln2_transfer(dln2, DLN2_HANDLE_CTRL, CMD_GET_DEVICE_SN, NULL, 0, &serial_no, &len); if (ret < 0) return ret; if (len < sizeof(serial_no)) return -EREMOTEIO; dev_info(dev, "Diolan DLN2 serial %u\n", le32_to_cpu(serial_no)); return 0; } static int dln2_hw_init(struct dln2_dev *dln2) { int ret; ret = dln2_check_hw(dln2); if (ret < 0) return ret; return dln2_print_serialno(dln2); } static void dln2_free_rx_urbs(struct dln2_dev *dln2) { int i; for (i = 0; i < DLN2_MAX_URBS; i++) { usb_free_urb(dln2->rx_urb[i]); kfree(dln2->rx_buf[i]); } } static void dln2_stop_rx_urbs(struct dln2_dev *dln2) { int i; for (i = 0; i < DLN2_MAX_URBS; i++) usb_kill_urb(dln2->rx_urb[i]); } static void dln2_free(struct dln2_dev *dln2) { dln2_free_rx_urbs(dln2); usb_put_dev(dln2->usb_dev); kfree(dln2); } static int dln2_setup_rx_urbs(struct dln2_dev *dln2, struct usb_host_interface *hostif) { int i; const int rx_max_size = DLN2_RX_BUF_SIZE; for (i = 0; i < DLN2_MAX_URBS; i++) { dln2->rx_buf[i] = kmalloc(rx_max_size, GFP_KERNEL); if (!dln2->rx_buf[i]) return -ENOMEM; dln2->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL); if (!dln2->rx_urb[i]) return -ENOMEM; usb_fill_bulk_urb(dln2->rx_urb[i], dln2->usb_dev, usb_rcvbulkpipe(dln2->usb_dev, dln2->ep_in), dln2->rx_buf[i], rx_max_size, dln2_rx, dln2); } return 0; } static int dln2_start_rx_urbs(struct dln2_dev *dln2, gfp_t gfp) { struct device *dev = &dln2->interface->dev; int ret; int i; for (i = 0; i < DLN2_MAX_URBS; i++) { ret = usb_submit_urb(dln2->rx_urb[i], gfp); if (ret < 0) { dev_err(dev, "failed to submit RX URB: %d\n", ret); return ret; } } return 0; } enum { DLN2_ACPI_MATCH_GPIO = 0, DLN2_ACPI_MATCH_I2C = 1, DLN2_ACPI_MATCH_SPI = 2, DLN2_ACPI_MATCH_ADC = 3, }; static struct dln2_platform_data dln2_pdata_gpio = { .handle = DLN2_HANDLE_GPIO, }; static struct mfd_cell_acpi_match dln2_acpi_match_gpio = { .adr = DLN2_ACPI_MATCH_GPIO, }; /* Only one I2C port seems to be supported on current hardware */ static struct dln2_platform_data dln2_pdata_i2c = { .handle = DLN2_HANDLE_I2C, .port = 0, }; static struct mfd_cell_acpi_match dln2_acpi_match_i2c = { .adr = DLN2_ACPI_MATCH_I2C, }; /* Only one SPI port supported */ static struct dln2_platform_data dln2_pdata_spi = { .handle = DLN2_HANDLE_SPI, .port = 0, }; static struct mfd_cell_acpi_match dln2_acpi_match_spi = { .adr = DLN2_ACPI_MATCH_SPI, }; /* Only one ADC port supported */ static struct dln2_platform_data dln2_pdata_adc = { .handle = DLN2_HANDLE_ADC, .port = 0, }; static struct mfd_cell_acpi_match dln2_acpi_match_adc = { .adr = DLN2_ACPI_MATCH_ADC, }; static const struct mfd_cell dln2_devs[] = { { .name = "dln2-gpio", .acpi_match = &dln2_acpi_match_gpio, .platform_data = &dln2_pdata_gpio, .pdata_size = sizeof(struct dln2_platform_data), }, { .name = "dln2-i2c", .acpi_match = &dln2_acpi_match_i2c, .platform_data = &dln2_pdata_i2c, .pdata_size = sizeof(struct dln2_platform_data), }, { .name = "dln2-spi", .acpi_match = &dln2_acpi_match_spi, .platform_data = &dln2_pdata_spi, .pdata_size = sizeof(struct dln2_platform_data), }, { .name = "dln2-adc", .acpi_match = &dln2_acpi_match_adc, .platform_data = &dln2_pdata_adc, .pdata_size = sizeof(struct dln2_platform_data), }, }; static void dln2_stop(struct dln2_dev *dln2) { int i, j; /* don't allow starting new transfers */ spin_lock(&dln2->disconnect_lock); dln2->disconnect = true; spin_unlock(&dln2->disconnect_lock); /* cancel in progress transfers */ for (i = 0; i < DLN2_HANDLES; i++) { struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[i]; unsigned long flags; spin_lock_irqsave(&rxs->lock, flags); /* cancel all response waiters */ for (j = 0; j < DLN2_MAX_RX_SLOTS; j++) { struct dln2_rx_context *rxc = &rxs->slots[j]; if (rxc->in_use) complete(&rxc->done); } spin_unlock_irqrestore(&rxs->lock, flags); } /* wait for transfers to end */ wait_event(dln2->disconnect_wq, !dln2->active_transfers); dln2_stop_rx_urbs(dln2); } static void dln2_disconnect(struct usb_interface *interface) { struct dln2_dev *dln2 = usb_get_intfdata(interface); dln2_stop(dln2); mfd_remove_devices(&interface->dev); dln2_free(dln2); } static int dln2_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct usb_host_interface *hostif = interface->cur_altsetting; struct usb_endpoint_descriptor *epin; struct usb_endpoint_descriptor *epout; struct device *dev = &interface->dev; struct dln2_dev *dln2; int ret; int i, j; if (hostif->desc.bInterfaceNumber != 0) return -ENODEV; ret = usb_find_common_endpoints(hostif, &epin, &epout, NULL, NULL); if (ret) return ret; dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); if (!dln2) return -ENOMEM; dln2->ep_out = epout->bEndpointAddress; dln2->ep_in = epin->bEndpointAddress; dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dln2->interface = interface; usb_set_intfdata(interface, dln2); init_waitqueue_head(&dln2->disconnect_wq); for (i = 0; i < DLN2_HANDLES; i++) { init_waitqueue_head(&dln2->mod_rx_slots[i].wq); spin_lock_init(&dln2->mod_rx_slots[i].lock); for (j = 0; j < DLN2_MAX_RX_SLOTS; j++) init_completion(&dln2->mod_rx_slots[i].slots[j].done); } spin_lock_init(&dln2->event_cb_lock); spin_lock_init(&dln2->disconnect_lock); INIT_LIST_HEAD(&dln2->event_cb_list); ret = dln2_setup_rx_urbs(dln2, hostif); if (ret) goto out_free; ret = dln2_start_rx_urbs(dln2, GFP_KERNEL); if (ret) goto out_stop_rx; ret = dln2_hw_init(dln2); if (ret < 0) { dev_err(dev, "failed to initialize hardware\n"); goto out_stop_rx; } ret = mfd_add_hotplug_devices(dev, dln2_devs, ARRAY_SIZE(dln2_devs)); if (ret != 0) { dev_err(dev, "failed to add mfd devices to core\n"); goto out_stop_rx; } return 0; out_stop_rx: dln2_stop_rx_urbs(dln2); out_free: dln2_free(dln2); return ret; } static int dln2_suspend(struct usb_interface *iface, pm_message_t message) { struct dln2_dev *dln2 = usb_get_intfdata(iface); dln2_stop(dln2); return 0; } static int dln2_resume(struct usb_interface *iface) { struct dln2_dev *dln2 = usb_get_intfdata(iface); dln2->disconnect = false; return dln2_start_rx_urbs(dln2, GFP_NOIO); } static const struct usb_device_id dln2_table[] = { { USB_DEVICE(0xa257, 0x2013) }, { } }; MODULE_DEVICE_TABLE(usb, dln2_table); static struct usb_driver dln2_driver = { .name = "dln2", .probe = dln2_probe, .disconnect = dln2_disconnect, .id_table = dln2_table, .suspend = dln2_suspend, .resume = dln2_resume, }; module_usb_driver(dln2_driver); MODULE_AUTHOR("Octavian Purdila <octavian.purdila@intel.com>"); MODULE_DESCRIPTION("Core driver for the Diolan DLN2 interface adapter"); MODULE_LICENSE("GPL v2"); |
8 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | /* SPDX-License-Identifier: GPL-2.0 */ /* * fs-verity: read-only file-based authenticity protection * * This header declares the interface between the fs/verity/ support layer and * filesystems that support fs-verity. * * Copyright 2019 Google LLC */ #ifndef _LINUX_FSVERITY_H #define _LINUX_FSVERITY_H #include <linux/fs.h> #include <linux/mm.h> #include <crypto/hash_info.h> #include <crypto/sha2.h> #include <uapi/linux/fsverity.h> /* * Largest digest size among all hash algorithms supported by fs-verity. * Currently assumed to be <= size of fsverity_descriptor::root_hash. */ #define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE /* Arbitrary limit to bound the kmalloc() size. Can be changed. */ #define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 /* Verity operations for filesystems */ struct fsverity_operations { /** * Begin enabling verity on the given file. * * @filp: a readonly file descriptor for the file * * The filesystem must do any needed filesystem-specific preparations * for enabling verity, e.g. evicting inline data. It also must return * -EBUSY if verity is already being enabled on the given file. * * i_rwsem is held for write. * * Return: 0 on success, -errno on failure */ int (*begin_enable_verity)(struct file *filp); /** * End enabling verity on the given file. * * @filp: a readonly file descriptor for the file * @desc: the verity descriptor to write, or NULL on failure * @desc_size: size of verity descriptor, or 0 on failure * @merkle_tree_size: total bytes the Merkle tree took up * * If desc == NULL, then enabling verity failed and the filesystem only * must do any necessary cleanups. Else, it must also store the given * verity descriptor to a fs-specific location associated with the inode * and do any fs-specific actions needed to mark the inode as a verity * inode, e.g. setting a bit in the on-disk inode. The filesystem is * also responsible for setting the S_VERITY flag in the VFS inode. * * i_rwsem is held for write, but it may have been dropped between * ->begin_enable_verity() and ->end_enable_verity(). * * Return: 0 on success, -errno on failure */ int (*end_enable_verity)(struct file *filp, const void *desc, size_t desc_size, u64 merkle_tree_size); /** * Get the verity descriptor of the given inode. * * @inode: an inode with the S_VERITY flag set * @buf: buffer in which to place the verity descriptor * @bufsize: size of @buf, or 0 to retrieve the size only * * If bufsize == 0, then the size of the verity descriptor is returned. * Otherwise the verity descriptor is written to 'buf' and its actual * size is returned; -ERANGE is returned if it's too large. This may be * called by multiple processes concurrently on the same inode. * * Return: the size on success, -errno on failure */ int (*get_verity_descriptor)(struct inode *inode, void *buf, size_t bufsize); /** * Read a Merkle tree page of the given inode. * * @inode: the inode * @index: 0-based index of the page within the Merkle tree * @num_ra_pages: The number of Merkle tree pages that should be * prefetched starting at @index if the page at @index * isn't already cached. Implementations may ignore this * argument; it's only a performance optimization. * * This can be called at any time on an open verity file. It may be * called by multiple processes concurrently, even with the same page. * * Note that this must retrieve a *page*, not necessarily a *block*. * * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, pgoff_t index, unsigned long num_ra_pages); /** * Write a Merkle tree block to the given inode. * * @inode: the inode for which the Merkle tree is being built * @buf: the Merkle tree block to write * @pos: the position of the block in the Merkle tree (in bytes) * @size: the Merkle tree block size (in bytes) * * This is only called between ->begin_enable_verity() and * ->end_enable_verity(). * * Return: 0 on success, -errno on failure */ int (*write_merkle_tree_block)(struct inode *inode, const void *buf, u64 pos, unsigned int size); }; #ifdef CONFIG_FS_VERITY static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fsverity_set_info(). * I.e., another task may publish ->i_verity_info concurrently, * executing a RELEASE barrier. We need to use smp_load_acquire() here * to safely ACQUIRE the memory the other task published. */ return smp_load_acquire(&inode->i_verity_info); } /* enable.c */ int fsverity_ioctl_enable(struct file *filp, const void __user *arg); /* measure.c */ int fsverity_ioctl_measure(struct file *filp, void __user *arg); int fsverity_get_digest(struct inode *inode, u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE], u8 *alg, enum hash_algo *halg); /* open.c */ int __fsverity_file_open(struct inode *inode, struct file *filp); int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); void __fsverity_cleanup_inode(struct inode *inode); /** * fsverity_cleanup_inode() - free the inode's verity info, if present * @inode: an inode being evicted * * Filesystems must call this on inode eviction to free ->i_verity_info. */ static inline void fsverity_cleanup_inode(struct inode *inode) { if (inode->i_verity_info) __fsverity_cleanup_inode(inode); } /* read_metadata.c */ int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg); /* verify.c */ bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset); void fsverity_verify_bio(struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); #else /* !CONFIG_FS_VERITY */ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { return NULL; } /* enable.c */ static inline int fsverity_ioctl_enable(struct file *filp, const void __user *arg) { return -EOPNOTSUPP; } /* measure.c */ static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fsverity_get_digest(struct inode *inode, u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE], u8 *alg, enum hash_algo *halg) { /* * fsverity is not enabled in the kernel configuration, so always report * that the file doesn't have fsverity enabled (digest size 0). */ return 0; } /* open.c */ static inline int __fsverity_file_open(struct inode *inode, struct file *filp) { return -EOPNOTSUPP; } static inline int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr) { return -EOPNOTSUPP; } static inline void fsverity_cleanup_inode(struct inode *inode) { } /* read_metadata.c */ static inline int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg) { return -EOPNOTSUPP; } /* verify.c */ static inline bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset) { WARN_ON_ONCE(1); return false; } static inline void fsverity_verify_bio(struct bio *bio) { WARN_ON_ONCE(1); } static inline void fsverity_enqueue_verify_work(struct work_struct *work) { WARN_ON_ONCE(1); } #endif /* !CONFIG_FS_VERITY */ static inline bool fsverity_verify_folio(struct folio *folio) { return fsverity_verify_blocks(folio, folio_size(folio), 0); } static inline bool fsverity_verify_page(struct page *page) { return fsverity_verify_blocks(page_folio(page), PAGE_SIZE, 0); } /** * fsverity_active() - do reads from the inode need to go through fs-verity? * @inode: inode to check * * This checks whether ->i_verity_info has been set. * * Filesystems call this from ->readahead() to check whether the pages need to * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to * a race condition where the file is being read concurrently with * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) * * Return: true if reads need to go through fs-verity, otherwise false */ static inline bool fsverity_active(const struct inode *inode) { return fsverity_get_info(inode) != NULL; } /** * fsverity_file_open() - prepare to open a verity file * @inode: the inode being opened * @filp: the struct file being set up * * When opening a verity file, deny the open if it is for writing. Otherwise, * set up the inode's ->i_verity_info if not already done. * * When combined with fscrypt, this must be called after fscrypt_file_open(). * Otherwise, we won't have the key set up to decrypt the verity metadata. * * Return: 0 on success, -errno on failure */ static inline int fsverity_file_open(struct inode *inode, struct file *filp) { if (IS_VERITY(inode)) return __fsverity_file_open(inode, filp); return 0; } /** * fsverity_prepare_setattr() - prepare to change a verity inode's attributes * @dentry: dentry through which the inode is being changed * @attr: attributes to change * * Verity files are immutable, so deny truncates. This isn't covered by the * open-time check because sys_truncate() takes a path, not a file descriptor. * * Return: 0 on success, -errno on failure */ static inline int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr) { if (IS_VERITY(d_inode(dentry))) return __fsverity_prepare_setattr(dentry, attr); return 0; } #endif /* _LINUX_FSVERITY_H */ |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 | // SPDX-License-Identifier: GPL-2.0-or-later /* * dlmfs.c * * Code which implements the kernel side of a minimal userspace * interface to our DLM. This file handles the virtual file system * used for communication with userspace. Credit should go to ramfs, * which was a template for the fs side of this module. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. */ /* Simple VFS hooks based on: */ /* * Resizable simple ram filesystem for Linux. * * Copyright (C) 2000 Linus Torvalds. * 2000 Transmeta Corp. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/poll.h> #include <linux/uaccess.h> #include "../stackglue.h" #include "userdlm.h" #define MLOG_MASK_PREFIX ML_DLMFS #include "../cluster/masklog.h" static const struct super_operations dlmfs_ops; static const struct file_operations dlmfs_file_operations; static const struct inode_operations dlmfs_dir_inode_operations; static const struct inode_operations dlmfs_root_inode_operations; static const struct inode_operations dlmfs_file_inode_operations; static struct kmem_cache *dlmfs_inode_cache; struct workqueue_struct *user_dlm_worker; /* * These are the ABI capabilities of dlmfs. * * Over time, dlmfs has added some features that were not part of the * initial ABI. Unfortunately, some of these features are not detectable * via standard usage. For example, Linux's default poll always returns * EPOLLIN, so there is no way for a caller of poll(2) to know when dlmfs * added poll support. Instead, we provide this list of new capabilities. * * Capabilities is a read-only attribute. We do it as a module parameter * so we can discover it whether dlmfs is built in, loaded, or even not * loaded. * * The ABI features are local to this machine's dlmfs mount. This is * distinct from the locking protocol, which is concerned with inter-node * interaction. * * Capabilities: * - bast : EPOLLIN against the file descriptor of a held lock * signifies a bast fired on the lock. */ #define DLMFS_CAPABILITIES "bast stackglue" static int param_set_dlmfs_capabilities(const char *val, const struct kernel_param *kp) { printk(KERN_ERR "%s: readonly parameter\n", kp->name); return -EINVAL; } static int param_get_dlmfs_capabilities(char *buffer, const struct kernel_param *kp) { return sysfs_emit(buffer, DLMFS_CAPABILITIES); } module_param_call(capabilities, param_set_dlmfs_capabilities, param_get_dlmfs_capabilities, NULL, 0444); MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); /* * decodes a set of open flags into a valid lock level and a set of flags. * returns < 0 if we have invalid flags * flags which mean something to us: * O_RDONLY -> PRMODE level * O_WRONLY -> EXMODE level * * O_NONBLOCK -> NOQUEUE */ static int dlmfs_decode_open_flags(int open_flags, int *level, int *flags) { if (open_flags & (O_WRONLY|O_RDWR)) *level = DLM_LOCK_EX; else *level = DLM_LOCK_PR; *flags = 0; if (open_flags & O_NONBLOCK) *flags |= DLM_LKF_NOQUEUE; return 0; } static int dlmfs_file_open(struct inode *inode, struct file *file) { int status, level, flags; struct dlmfs_filp_private *fp = NULL; struct dlmfs_inode_private *ip; if (S_ISDIR(inode->i_mode)) BUG(); mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, file->f_flags); status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); if (status < 0) goto bail; /* We don't want to honor O_APPEND at read/write time as it * doesn't make sense for LVB writes. */ file->f_flags &= ~O_APPEND; fp = kmalloc(sizeof(*fp), GFP_NOFS); if (!fp) { status = -ENOMEM; goto bail; } fp->fp_lock_level = level; ip = DLMFS_I(inode); status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); if (status < 0) { /* this is a strange error to return here but I want * to be able userspace to be able to distinguish a * valid lock request from one that simply couldn't be * granted. */ if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN) status = -ETXTBSY; kfree(fp); goto bail; } file->private_data = fp; bail: return status; } static int dlmfs_file_release(struct inode *inode, struct file *file) { int level; struct dlmfs_inode_private *ip = DLMFS_I(inode); struct dlmfs_filp_private *fp = file->private_data; if (S_ISDIR(inode->i_mode)) BUG(); mlog(0, "close called on inode %lu\n", inode->i_ino); if (fp) { level = fp->fp_lock_level; if (level != DLM_LOCK_IV) user_dlm_cluster_unlock(&ip->ip_lockres, level); kfree(fp); file->private_data = NULL; } return 0; } /* * We do ->setattr() just to override size changes. Our size is the size * of the LVB and nothing else. */ static int dlmfs_file_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); attr->ia_valid &= ~ATTR_SIZE; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } static __poll_t dlmfs_file_poll(struct file *file, poll_table *wait) { __poll_t event = 0; struct inode *inode = file_inode(file); struct dlmfs_inode_private *ip = DLMFS_I(inode); poll_wait(file, &ip->ip_lockres.l_event, wait); spin_lock(&ip->ip_lockres.l_lock); if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) event = EPOLLIN | EPOLLRDNORM; spin_unlock(&ip->ip_lockres.l_lock); return event; } static ssize_t dlmfs_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char lvb[DLM_LVB_LEN]; if (!user_dlm_read_lvb(file_inode(file), lvb)) return 0; return simple_read_from_buffer(buf, count, ppos, lvb, sizeof(lvb)); } static ssize_t dlmfs_file_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { char lvb_buf[DLM_LVB_LEN]; int bytes_left; struct inode *inode = file_inode(filp); mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", inode->i_ino, count, *ppos); if (*ppos >= DLM_LVB_LEN) return -ENOSPC; /* don't write past the lvb */ if (count > DLM_LVB_LEN - *ppos) count = DLM_LVB_LEN - *ppos; if (!count) return 0; bytes_left = copy_from_user(lvb_buf, buf, count); count -= bytes_left; if (count) user_dlm_write_lvb(inode, lvb_buf, count); *ppos = *ppos + count; mlog(0, "wrote %zu bytes\n", count); return count; } static void dlmfs_init_once(void *foo) { struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; ip->ip_conn = NULL; ip->ip_parent = NULL; inode_init_once(&ip->ip_vfs_inode); } static struct inode *dlmfs_alloc_inode(struct super_block *sb) { struct dlmfs_inode_private *ip; ip = alloc_inode_sb(sb, dlmfs_inode_cache, GFP_NOFS); if (!ip) return NULL; return &ip->ip_vfs_inode; } static void dlmfs_free_inode(struct inode *inode) { kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } static void dlmfs_evict_inode(struct inode *inode) { int status; struct dlmfs_inode_private *ip; struct user_lock_res *lockres; int teardown; clear_inode(inode); mlog(0, "inode %lu\n", inode->i_ino); ip = DLMFS_I(inode); lockres = &ip->ip_lockres; if (S_ISREG(inode->i_mode)) { spin_lock(&lockres->l_lock); teardown = !!(lockres->l_flags & USER_LOCK_IN_TEARDOWN); spin_unlock(&lockres->l_lock); if (!teardown) { status = user_dlm_destroy_lock(lockres); if (status < 0) mlog_errno(status); } iput(ip->ip_parent); goto clear_fields; } mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn); /* we must be a directory. If required, lets unregister the * dlm context now. */ if (ip->ip_conn) user_dlm_unregister(ip->ip_conn); clear_fields: ip->ip_parent = NULL; ip->ip_conn = NULL; } static struct inode *dlmfs_get_root_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); umode_t mode = S_IFDIR | 0755; if (inode) { inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); simple_inode_init_ts(inode); inc_nlink(inode); inode->i_fop = &simple_dir_operations; inode->i_op = &dlmfs_root_inode_operations; } return inode; } static struct inode *dlmfs_get_inode(struct inode *parent, struct dentry *dentry, umode_t mode) { struct super_block *sb = parent->i_sb; struct inode * inode = new_inode(sb); struct dlmfs_inode_private *ip; if (!inode) return NULL; inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, parent, mode); simple_inode_init_ts(inode); ip = DLMFS_I(inode); ip->ip_conn = DLMFS_I(parent)->ip_conn; switch (mode & S_IFMT) { default: /* for now we don't support anything other than * directories and regular files. */ BUG(); break; case S_IFREG: inode->i_op = &dlmfs_file_inode_operations; inode->i_fop = &dlmfs_file_operations; i_size_write(inode, DLM_LVB_LEN); user_dlm_lock_res_init(&ip->ip_lockres, dentry); /* released at clear_inode time, this insures that we * get to drop the dlm reference on each lock *before* * we call the unregister code for releasing parent * directories. */ ip->ip_parent = igrab(parent); BUG_ON(!ip->ip_parent); break; case S_IFDIR: inode->i_op = &dlmfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == * 2 (for "." entry) */ inc_nlink(inode); break; } return inode; } /* * File creation. Allocate an inode, and we're done.. */ /* SMP-safe */ static int dlmfs_mkdir(struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode) { int status; struct inode *inode = NULL; const struct qstr *domain = &dentry->d_name; struct dlmfs_inode_private *ip; struct ocfs2_cluster_connection *conn; mlog(0, "mkdir %.*s\n", domain->len, domain->name); /* verify that we have a proper domain */ if (domain->len >= GROUP_NAME_MAX) { status = -EINVAL; mlog(ML_ERROR, "invalid domain name for directory.\n"); goto bail; } inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); if (!inode) { status = -ENOMEM; mlog_errno(status); goto bail; } ip = DLMFS_I(inode); conn = user_dlm_register(domain); if (IS_ERR(conn)) { status = PTR_ERR(conn); mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", status, domain->len, domain->name); goto bail; } ip->ip_conn = conn; inc_nlink(dir); d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ status = 0; bail: if (status < 0) iput(inode); return status; } static int dlmfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { int status = 0; struct inode *inode; const struct qstr *name = &dentry->d_name; mlog(0, "create %.*s\n", name->len, name->name); /* verify name is valid and doesn't contain any dlm reserved * characters */ if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || name->name[0] == '$') { status = -EINVAL; mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, name->name); goto bail; } inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); if (!inode) { status = -ENOMEM; mlog_errno(status); goto bail; } d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ bail: return status; } static int dlmfs_unlink(struct inode *dir, struct dentry *dentry) { int status; struct inode *inode = d_inode(dentry); mlog(0, "unlink inode %lu\n", inode->i_ino); /* if there are no current holders, or none that are waiting * to acquire a lock, this basically destroys our lockres. */ status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); if (status < 0) { mlog(ML_ERROR, "unlink %pd, error %d from destroy\n", dentry, status); goto bail; } status = simple_unlink(dir, dentry); bail: return status; } static int dlmfs_fill_super(struct super_block *sb, struct fs_context *fc) { sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = DLMFS_MAGIC; sb->s_op = &dlmfs_ops; sb->s_root = d_make_root(dlmfs_get_root_inode(sb)); if (!sb->s_root) return -ENOMEM; return 0; } static const struct file_operations dlmfs_file_operations = { .open = dlmfs_file_open, .release = dlmfs_file_release, .poll = dlmfs_file_poll, .read = dlmfs_file_read, .write = dlmfs_file_write, .llseek = default_llseek, }; static const struct inode_operations dlmfs_dir_inode_operations = { .create = dlmfs_create, .lookup = simple_lookup, .unlink = dlmfs_unlink, }; /* this way we can restrict mkdir to only the toplevel of the fs. */ static const struct inode_operations dlmfs_root_inode_operations = { .lookup = simple_lookup, .mkdir = dlmfs_mkdir, .rmdir = simple_rmdir, }; static const struct super_operations dlmfs_ops = { .statfs = simple_statfs, .alloc_inode = dlmfs_alloc_inode, .free_inode = dlmfs_free_inode, .evict_inode = dlmfs_evict_inode, .drop_inode = generic_delete_inode, }; static const struct inode_operations dlmfs_file_inode_operations = { .getattr = simple_getattr, .setattr = dlmfs_file_setattr, }; static int dlmfs_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, dlmfs_fill_super); } static const struct fs_context_operations dlmfs_context_ops = { .get_tree = dlmfs_get_tree, }; static int dlmfs_init_fs_context(struct fs_context *fc) { fc->ops = &dlmfs_context_ops; return 0; } static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", .kill_sb = kill_litter_super, .init_fs_context = dlmfs_init_fs_context, }; MODULE_ALIAS_FS("ocfs2_dlmfs"); static int __init init_dlmfs_fs(void) { int status; int cleanup_inode = 0, cleanup_worker = 0; dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", sizeof(struct dlmfs_inode_private), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), dlmfs_init_once); if (!dlmfs_inode_cache) { status = -ENOMEM; goto bail; } cleanup_inode = 1; user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0); if (!user_dlm_worker) { status = -ENOMEM; goto bail; } cleanup_worker = 1; user_dlm_set_locking_protocol(); status = register_filesystem(&dlmfs_fs_type); bail: if (status) { if (cleanup_inode) kmem_cache_destroy(dlmfs_inode_cache); if (cleanup_worker) destroy_workqueue(user_dlm_worker); } else printk("OCFS2 User DLM kernel interface loaded\n"); return status; } static void __exit exit_dlmfs_fs(void) { unregister_filesystem(&dlmfs_fs_type); destroy_workqueue(user_dlm_worker); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); } MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OCFS2 DLM-Filesystem"); module_init(init_dlmfs_fs) module_exit(exit_dlmfs_fs) |
1 1 4 1 3 11 4 2 5 1 4 17 14 3 15 2 14 1 2 13 3 10 8 2 4 11 1 1 11 6 3 3 6 4 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <net/ip.h> #include <net/tcp.h> #include <net/netlink.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_synproxy.h> #include <linux/netfilter/nf_tables.h> #include <linux/netfilter/nf_synproxy.h> struct nft_synproxy { struct nf_synproxy_info info; }; static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = { [NFTA_SYNPROXY_MSS] = { .type = NLA_U16 }, [NFTA_SYNPROXY_WSCALE] = { .type = NLA_U8 }, [NFTA_SYNPROXY_FLAGS] = { .type = NLA_U32 }, }; static void nft_synproxy_tcp_options(struct synproxy_options *opts, const struct tcphdr *tcp, struct synproxy_net *snet, struct nf_synproxy_info *info, const struct nft_synproxy *priv) { this_cpu_inc(snet->stats->syn_received); if (tcp->ece && tcp->cwr) opts->options |= NF_SYNPROXY_OPT_ECN; opts->options &= priv->info.options; opts->mss_encode = opts->mss_option; opts->mss_option = info->mss; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, opts); else opts->options &= ~(NF_SYNPROXY_OPT_WSCALE | NF_SYNPROXY_OPT_SACK_PERM | NF_SYNPROXY_OPT_ECN); } static void nft_synproxy_eval_v4(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt, const struct tcphdr *tcp, struct tcphdr *_tcph, struct synproxy_options *opts) { struct nf_synproxy_info info = priv->info; struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *skb = pkt->skb; if (tcp->syn) { /* Initial SYN from client */ nft_synproxy_tcp_options(opts, tcp, snet, &info, priv); synproxy_send_client_synack(net, skb, tcp, opts); consume_skb(skb); regs->verdict.code = NF_STOLEN; } else if (tcp->ack) { /* ACK from client */ if (synproxy_recv_client_ack(net, skb, tcp, opts, ntohl(tcp->seq))) { consume_skb(skb); regs->verdict.code = NF_STOLEN; } else { regs->verdict.code = NF_DROP; } } } #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) static void nft_synproxy_eval_v6(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt, const struct tcphdr *tcp, struct tcphdr *_tcph, struct synproxy_options *opts) { struct nf_synproxy_info info = priv->info; struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *skb = pkt->skb; if (tcp->syn) { /* Initial SYN from client */ nft_synproxy_tcp_options(opts, tcp, snet, &info, priv); synproxy_send_client_synack_ipv6(net, skb, tcp, opts); consume_skb(skb); regs->verdict.code = NF_STOLEN; } else if (tcp->ack) { /* ACK from client */ if (synproxy_recv_client_ack_ipv6(net, skb, tcp, opts, ntohl(tcp->seq))) { consume_skb(skb); regs->verdict.code = NF_STOLEN; } else { regs->verdict.code = NF_DROP; } } } #endif /* CONFIG_NF_TABLES_IPV6*/ static void nft_synproxy_do_eval(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct synproxy_options opts = {}; struct sk_buff *skb = pkt->skb; int thoff = nft_thoff(pkt); const struct tcphdr *tcp; struct tcphdr _tcph; if (pkt->tprot != IPPROTO_TCP) { regs->verdict.code = NFT_BREAK; return; } if (nf_ip_checksum(skb, nft_hook(pkt), thoff, IPPROTO_TCP)) { regs->verdict.code = NF_DROP; return; } tcp = skb_header_pointer(skb, thoff, sizeof(struct tcphdr), &_tcph); if (!tcp) { regs->verdict.code = NF_DROP; return; } if (!synproxy_parse_options(skb, thoff, tcp, &opts)) { regs->verdict.code = NF_DROP; return; } switch (skb->protocol) { case htons(ETH_P_IP): nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts); return; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case htons(ETH_P_IPV6): nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts); return; #endif } regs->verdict.code = NFT_BREAK; } static int nft_synproxy_do_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_synproxy *priv) { struct synproxy_net *snet = synproxy_pernet(ctx->net); u32 flags; int err; if (tb[NFTA_SYNPROXY_MSS]) priv->info.mss = ntohs(nla_get_be16(tb[NFTA_SYNPROXY_MSS])); if (tb[NFTA_SYNPROXY_WSCALE]) priv->info.wscale = nla_get_u8(tb[NFTA_SYNPROXY_WSCALE]); if (tb[NFTA_SYNPROXY_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_SYNPROXY_FLAGS])); if (flags & ~NF_SYNPROXY_OPT_MASK) return -EOPNOTSUPP; priv->info.options = flags; } err = nf_ct_netns_get(ctx->net, ctx->family); if (err) return err; switch (ctx->family) { case NFPROTO_IPV4: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: err = nf_synproxy_ipv6_init(snet, ctx->net); if (err) goto nf_ct_failure; break; #endif case NFPROTO_INET: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; err = nf_synproxy_ipv6_init(snet, ctx->net); if (err) { nf_synproxy_ipv4_fini(snet, ctx->net); goto nf_ct_failure; } break; } return 0; nf_ct_failure: nf_ct_netns_put(ctx->net, ctx->family); return err; } static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) { struct synproxy_net *snet = synproxy_pernet(ctx->net); switch (ctx->family) { case NFPROTO_IPV4: nf_synproxy_ipv4_fini(snet, ctx->net); break; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case NFPROTO_IPV6: nf_synproxy_ipv6_fini(snet, ctx->net); break; #endif case NFPROTO_INET: nf_synproxy_ipv4_fini(snet, ctx->net); nf_synproxy_ipv6_fini(snet, ctx->net); break; } nf_ct_netns_put(ctx->net, ctx->family); } static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv) { if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) || nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) || nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static void nft_synproxy_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_synproxy *priv = nft_expr_priv(expr); nft_synproxy_do_eval(priv, regs, pkt); } static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && ctx->family != NFPROTO_INET) return -EOPNOTSUPP; return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD)); } static int nft_synproxy_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_synproxy *priv = nft_expr_priv(expr); return nft_synproxy_do_init(ctx, tb, priv); } static void nft_synproxy_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nft_synproxy_do_destroy(ctx); } static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_synproxy *priv = nft_expr_priv(expr); return nft_synproxy_do_dump(skb, priv); } static struct nft_expr_type nft_synproxy_type; static const struct nft_expr_ops nft_synproxy_ops = { .eval = nft_synproxy_eval, .size = NFT_EXPR_SIZE(sizeof(struct nft_synproxy)), .init = nft_synproxy_init, .destroy = nft_synproxy_destroy, .dump = nft_synproxy_dump, .type = &nft_synproxy_type, .validate = nft_synproxy_validate, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_synproxy_type __read_mostly = { .ops = &nft_synproxy_ops, .name = "synproxy", .owner = THIS_MODULE, .policy = nft_synproxy_policy, .maxattr = NFTA_SYNPROXY_MAX, }; static int nft_synproxy_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_synproxy *priv = nft_obj_data(obj); return nft_synproxy_do_init(ctx, tb, priv); } static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { nft_synproxy_do_destroy(ctx); } static int nft_synproxy_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { struct nft_synproxy *priv = nft_obj_data(obj); return nft_synproxy_do_dump(skb, priv); } static void nft_synproxy_obj_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_synproxy *priv = nft_obj_data(obj); nft_synproxy_do_eval(priv, regs, pkt); } static void nft_synproxy_obj_update(struct nft_object *obj, struct nft_object *newobj) { struct nft_synproxy *newpriv = nft_obj_data(newobj); struct nft_synproxy *priv = nft_obj_data(obj); priv->info = newpriv->info; } static struct nft_object_type nft_synproxy_obj_type; static const struct nft_object_ops nft_synproxy_obj_ops = { .type = &nft_synproxy_obj_type, .size = sizeof(struct nft_synproxy), .init = nft_synproxy_obj_init, .destroy = nft_synproxy_obj_destroy, .dump = nft_synproxy_obj_dump, .eval = nft_synproxy_obj_eval, .update = nft_synproxy_obj_update, }; static struct nft_object_type nft_synproxy_obj_type __read_mostly = { .type = NFT_OBJECT_SYNPROXY, .ops = &nft_synproxy_obj_ops, .maxattr = NFTA_SYNPROXY_MAX, .policy = nft_synproxy_policy, .owner = THIS_MODULE, }; static int __init nft_synproxy_module_init(void) { int err; err = nft_register_obj(&nft_synproxy_obj_type); if (err < 0) return err; err = nft_register_expr(&nft_synproxy_type); if (err < 0) goto err; return 0; err: nft_unregister_obj(&nft_synproxy_obj_type); return err; } static void __exit nft_synproxy_module_exit(void) { nft_unregister_expr(&nft_synproxy_type); nft_unregister_obj(&nft_synproxy_obj_type); } module_init(nft_synproxy_module_init); module_exit(nft_synproxy_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("synproxy"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY); MODULE_DESCRIPTION("nftables SYNPROXY expression support"); |
27 27 10 10 10 2 24 18 18 38 7 38 38 36 37 47 48 29 38 27 2 2 11 11 42 42 19 19 2 2 11 10 11 11 1 12 11 2 1 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | /* * Copyright (c) 2006, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/skbuff.h> #include <linux/list.h> #include <linux/errqueue.h> #include "rds.h" static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { [RDS_EXTHDR_NONE] = 0, [RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version), [RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), [RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), [RDS_EXTHDR_NPATHS] = sizeof(u16), [RDS_EXTHDR_GEN_NUM] = sizeof(u32), }; void rds_message_addref(struct rds_message *rm) { rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); refcount_inc(&rm->m_refcount); } EXPORT_SYMBOL_GPL(rds_message_addref); static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie) { struct rds_zcopy_cookies *ck = &info->zcookies; int ncookies = ck->num; if (ncookies == RDS_MAX_ZCOOKIES) return false; ck->cookies[ncookies] = cookie; ck->num = ++ncookies; return true; } static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif) { return container_of(znotif, struct rds_msg_zcopy_info, znotif); } void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q) { unsigned long flags; LIST_HEAD(copy); struct rds_msg_zcopy_info *info, *tmp; spin_lock_irqsave(&q->lock, flags); list_splice(&q->zcookie_head, ©); INIT_LIST_HEAD(&q->zcookie_head); spin_unlock_irqrestore(&q->lock, flags); list_for_each_entry_safe(info, tmp, ©, rs_zcookie_next) { list_del(&info->rs_zcookie_next); kfree(info); } } static void rds_rm_zerocopy_callback(struct rds_sock *rs, struct rds_znotifier *znotif) { struct rds_msg_zcopy_info *info; struct rds_msg_zcopy_queue *q; u32 cookie = znotif->z_cookie; struct rds_zcopy_cookies *ck; struct list_head *head; unsigned long flags; mm_unaccount_pinned_pages(&znotif->z_mmp); q = &rs->rs_zcookie_queue; spin_lock_irqsave(&q->lock, flags); head = &q->zcookie_head; if (!list_empty(head)) { info = list_first_entry(head, struct rds_msg_zcopy_info, rs_zcookie_next); if (rds_zcookie_add(info, cookie)) { spin_unlock_irqrestore(&q->lock, flags); kfree(rds_info_from_znotifier(znotif)); /* caller invokes rds_wake_sk_sleep() */ return; } } info = rds_info_from_znotifier(znotif); ck = &info->zcookies; memset(ck, 0, sizeof(*ck)); WARN_ON(!rds_zcookie_add(info, cookie)); list_add_tail(&info->rs_zcookie_next, &q->zcookie_head); spin_unlock_irqrestore(&q->lock, flags); /* caller invokes rds_wake_sk_sleep() */ } /* * This relies on dma_map_sg() not touching sg[].page during merging. */ static void rds_message_purge(struct rds_message *rm) { unsigned long i, flags; bool zcopy = false; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; spin_lock_irqsave(&rm->m_rs_lock, flags); if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; if (rm->data.op_mmp_znotifier) { zcopy = true; rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); rds_wake_sk_sleep(rs); rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); rm->m_rs = NULL; } spin_unlock_irqrestore(&rm->m_rs_lock, flags); for (i = 0; i < rm->data.op_nents; i++) { /* XXX will have to put_page for page refs */ if (!zcopy) __free_page(sg_page(&rm->data.op_sg[i])); else put_page(sg_page(&rm->data.op_sg[i])); } rm->data.op_nents = 0; if (rm->rdma.op_active) rds_rdma_free_op(&rm->rdma); if (rm->rdma.op_rdma_mr) kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final); if (rm->atomic.op_active) rds_atomic_free_op(&rm->atomic); if (rm->atomic.op_rdma_mr) kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final); } void rds_message_put(struct rds_message *rm) { rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); if (refcount_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); rds_message_purge(rm); kfree(rm); } } EXPORT_SYMBOL_GPL(rds_message_put); void rds_message_populate_header(struct rds_header *hdr, __be16 sport, __be16 dport, u64 seq) { hdr->h_flags = 0; hdr->h_sport = sport; hdr->h_dport = dport; hdr->h_sequence = cpu_to_be64(seq); hdr->h_exthdr[0] = RDS_EXTHDR_NONE; } EXPORT_SYMBOL_GPL(rds_message_populate_header); int rds_message_add_extension(struct rds_header *hdr, unsigned int type, const void *data, unsigned int len) { unsigned int ext_len = sizeof(u8) + len; unsigned char *dst; /* For now, refuse to add more than one extension header */ if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE) return 0; if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type]) return 0; if (ext_len >= RDS_HEADER_EXT_SPACE) return 0; dst = hdr->h_exthdr; *dst++ = type; memcpy(dst, data, len); dst[len] = RDS_EXTHDR_NONE; return 1; } EXPORT_SYMBOL_GPL(rds_message_add_extension); /* * If a message has extension headers, retrieve them here. * Call like this: * * unsigned int pos = 0; * * while (1) { * buflen = sizeof(buffer); * type = rds_message_next_extension(hdr, &pos, buffer, &buflen); * if (type == RDS_EXTHDR_NONE) * break; * ... * } */ int rds_message_next_extension(struct rds_header *hdr, unsigned int *pos, void *buf, unsigned int *buflen) { unsigned int offset, ext_type, ext_len; u8 *src = hdr->h_exthdr; offset = *pos; if (offset >= RDS_HEADER_EXT_SPACE) goto none; /* Get the extension type and length. For now, the * length is implied by the extension type. */ ext_type = src[offset++]; if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX) goto none; ext_len = rds_exthdr_size[ext_type]; if (offset + ext_len > RDS_HEADER_EXT_SPACE) goto none; *pos = offset + ext_len; if (ext_len < *buflen) *buflen = ext_len; memcpy(buf, src + offset, *buflen); return ext_type; none: *pos = RDS_HEADER_EXT_SPACE; *buflen = 0; return RDS_EXTHDR_NONE; } int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset) { struct rds_ext_header_rdma_dest ext_hdr; ext_hdr.h_rdma_rkey = cpu_to_be32(r_key); ext_hdr.h_rdma_offset = cpu_to_be32(offset); return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr)); } EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); /* * Each rds_message is allocated with extra space for the scatterlist entries * rds ops will need. This is to minimize memory allocation count. Then, each rds op * can grab SGs when initializing its part of the rds_message. */ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) { struct rds_message *rm; if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message)) return NULL; rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); if (!rm) goto out; rm->m_used_sgs = 0; rm->m_total_sgs = extra_len / sizeof(struct scatterlist); refcount_set(&rm->m_refcount, 1); INIT_LIST_HEAD(&rm->m_sock_item); INIT_LIST_HEAD(&rm->m_conn_item); spin_lock_init(&rm->m_rs_lock); init_waitqueue_head(&rm->m_flush_wait); out: return rm; } /* * RDS ops use this to grab SG entries from the rm's sg pool. */ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) { struct scatterlist *sg_first = (struct scatterlist *) &rm[1]; struct scatterlist *sg_ret; if (nents <= 0) { pr_warn("rds: alloc sgs failed! nents <= 0\n"); return ERR_PTR(-EINVAL); } if (rm->m_used_sgs + nents > rm->m_total_sgs) { pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n", rm->m_total_sgs, rm->m_used_sgs, nents); return ERR_PTR(-ENOMEM); } sg_ret = &sg_first[rm->m_used_sgs]; sg_init_table(sg_ret, nents); rm->m_used_sgs += nents; return sg_ret; } struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) { struct rds_message *rm; unsigned int i; int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); if (!rm) return ERR_PTR(-ENOMEM); set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); if (IS_ERR(rm->data.op_sg)) { void *err = ERR_CAST(rm->data.op_sg); rds_message_put(rm); return err; } for (i = 0; i < rm->data.op_nents; ++i) { sg_set_page(&rm->data.op_sg[i], virt_to_page((void *)page_addrs[i]), PAGE_SIZE, 0); } return rm; } static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) { struct scatterlist *sg; int ret = 0; int length = iov_iter_count(from); struct rds_msg_zcopy_info *info; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); /* * now allocate and copy in the data payload. */ sg = rm->data.op_sg; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; INIT_LIST_HEAD(&info->rs_zcookie_next); rm->data.op_mmp_znotifier = &info->znotif; if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, length)) { ret = -ENOMEM; goto err; } while (iov_iter_count(from)) { struct page *pages; size_t start; ssize_t copied; copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE, 1, &start); if (copied < 0) { struct mmpin *mmp; int i; for (i = 0; i < rm->data.op_nents; i++) put_page(sg_page(&rm->data.op_sg[i])); mmp = &rm->data.op_mmp_znotifier->z_mmp; mm_unaccount_pinned_pages(mmp); ret = -EFAULT; goto err; } length -= copied; sg_set_page(sg, pages, copied, start); rm->data.op_nents++; sg++; } WARN_ON_ONCE(length != 0); return ret; err: kfree(info); rm->data.op_mmp_znotifier = NULL; return ret; } int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, bool zcopy) { unsigned long to_copy, nbytes; unsigned long sg_off; struct scatterlist *sg; int ret = 0; rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); /* now allocate and copy in the data payload. */ sg = rm->data.op_sg; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ if (zcopy) return rds_message_zcopy_from_user(rm, from); while (iov_iter_count(from)) { if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, iov_iter_count(from), GFP_HIGHUSER); if (ret) return ret; rm->data.op_nents++; sg_off = 0; } to_copy = min_t(unsigned long, iov_iter_count(from), sg->length - sg_off); rds_stats_add(s_copy_from_user, to_copy); nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, to_copy, from); if (nbytes != to_copy) return -EFAULT; sg_off += to_copy; if (sg_off == sg->length) sg++; } return ret; } int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) { struct rds_message *rm; struct scatterlist *sg; unsigned long to_copy; unsigned long vec_off; int copied; int ret; u32 len; rm = container_of(inc, struct rds_message, m_inc); len = be32_to_cpu(rm->m_inc.i_hdr.h_len); sg = rm->data.op_sg; vec_off = 0; copied = 0; while (iov_iter_count(to) && copied < len) { to_copy = min_t(unsigned long, iov_iter_count(to), sg->length - vec_off); to_copy = min_t(unsigned long, to_copy, len - copied); rds_stats_add(s_copy_to_user, to_copy); ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off, to_copy, to); if (ret != to_copy) return -EFAULT; vec_off += to_copy; copied += to_copy; if (vec_off == sg->length) { vec_off = 0; sg++; } } return copied; } /* * If the message is still on the send queue, wait until the transport * is done with it. This is particularly important for RDMA operations. */ void rds_message_wait(struct rds_message *rm) { wait_event_interruptible(rm->m_flush_wait, !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); } void rds_message_unmapped(struct rds_message *rm) { clear_bit(RDS_MSG_MAPPED, &rm->m_flags); wake_up_interruptible(&rm->m_flush_wait); } EXPORT_SYMBOL_GPL(rds_message_unmapped); |
7377 7365 7360 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 | // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2022 Linutronix GmbH, John Ogness // Copyright (C) 2022 Intel, Thomas Gleixner #include <linux/atomic.h> #include <linux/bug.h> #include <linux/console.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/init.h> #include <linux/irqflags.h> #include <linux/kthread.h> #include <linux/minmax.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include "internal.h" #include "printk_ringbuffer.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. * * The state of the console is maintained in the "nbcon_state" atomic * variable. * * The console is locked when: * * - The 'prio' field contains the priority of the context that owns the * console. Only higher priority contexts are allowed to take over the * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. * * - The 'cpu' field denotes on which CPU the console is locked. It is used * to prevent busy waiting on the same CPU. Also it informs the lock owner * that it has lost the lock in a more complex scenario when the lock was * taken over by a higher priority context, released, and taken on another * CPU with the same priority as the interrupted owner. * * The acquire mechanism uses a few more fields: * * - The 'req_prio' field is used by the handover approach to make the * current owner aware that there is a context with a higher priority * waiting for the friendly handover. * * - The 'unsafe' field allows to take over the console in a safe way in the * middle of emitting a message. The field is set only when accessing some * shared resources or when the console device is manipulated. It can be * cleared, for example, after emitting one character when the console * device is in a consistent state. * * - The 'unsafe_takeover' field is set when a hostile takeover took the * console in an unsafe state. The console will stay in the unsafe state * until re-initialized. * * The acquire mechanism uses three approaches: * * 1) Direct acquire when the console is not owned or is owned by a lower * priority context and is in a safe state. * * 2) Friendly handover mechanism uses a request/grant handshake. It is used * when the current owner has lower priority and the console is in an * unsafe state. * * The requesting context: * * a) Sets its priority into the 'req_prio' field. * * b) Waits (with a timeout) for the owning context to unlock the * console. * * c) Takes the lock and clears the 'req_prio' field. * * The owning context: * * a) Observes the 'req_prio' field set on exit from the unsafe * console state. * * b) Gives up console ownership by clearing the 'prio' field. * * 3) Unsafe hostile takeover allows to take over the lock even when the * console is an unsafe state. It is used only in panic() by the final * attempt to flush consoles in a try and hope mode. * * Note that separate record buffers are used in panic(). As a result, * the messages can be read and formatted without any risk even after * using the hostile takeover in unsafe state. * * The release function simply clears the 'prio' field. * * All operations on @console::nbcon_state are atomic cmpxchg based to * handle concurrency. * * The acquire/release functions implement only minimal policies: * * - Preference for higher priority contexts. * - Protection of the panic CPU. * * All other policy decisions must be made at the call sites: * * - What is marked as an unsafe section. * - Whether to spin-wait if there is already an owner and the console is * in an unsafe state. * - Whether to attempt an unsafe hostile takeover. * * The design allows to implement the well known: * * acquire() * output_one_printk_record() * release() * * The output of one printk record might be interrupted with a higher priority * context. The new owner is supposed to reprint the entire interrupted record * from scratch. */ /** * nbcon_state_set - Helper function to set the console state * @con: Console to update * @new: The new state to write * * Only to be used when the console is not yet or no longer visible in the * system. Otherwise use nbcon_state_try_cmpxchg(). */ static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) { atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); } /** * nbcon_state_read - Helper function to read the console state * @con: Console to read * @state: The state to store the result */ static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) { state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); } /** * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state * @con: Console to update * @cur: Old/expected state * @new: New state * * Return: True on success. False on fail and @cur is updated. */ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, struct nbcon_state *new) { return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } /** * nbcon_seq_read - Read the current console sequence * @con: Console to read the sequence of * * Return: Sequence number of the next record to print on @con. */ u64 nbcon_seq_read(struct console *con) { unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); return __ulseq_to_u64seq(prb, nbcon_seq); } /** * nbcon_seq_force - Force console sequence to a specific value * @con: Console to work on * @seq: Sequence number value to set * * Only to be used during init (before registration) or in extreme situations * (such as panic with CONSOLE_REPLAY_ALL). */ void nbcon_seq_force(struct console *con, u64 seq) { /* * If the specified record no longer exists, the oldest available record * is chosen. This is especially important on 32bit systems because only * the lower 32 bits of the sequence number are stored. The upper 32 bits * are derived from the sequence numbers available in the ringbuffer. */ u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); } /** * nbcon_seq_try_update - Try to update the console sequence number * @ctxt: Pointer to an acquire context that contains * all information about the acquire mode * @new_seq: The new sequence number to set * * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to * the 64bit value). This could be a different value than @new_seq if * nbcon_seq_force() was used or the current context no longer owns the * console. In the later case, it will stop printing anyway. */ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); struct console *con = ctxt->console; if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, __u64seq_to_ulseq(new_seq))) { ctxt->seq = new_seq; } else { ctxt->seq = nbcon_seq_read(con); } } /** * nbcon_context_try_acquire_direct - Try to acquire directly * @ctxt: The context of the caller * @cur: The current console state * * Acquire the console when it is released. Also acquire the console when * the current owner has a lower priority and the console is in a safe state. * * Return: 0 on success. Otherwise, an error code on failure. Also @cur * is updated to the latest state when failed to modify it. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU. * Or the current owner or waiter has the same or higher * priority. No acquire method can be successful in * this case. * * -EBUSY: The current owner has a lower priority but the console * in an unsafe state. The caller should try using * the handover acquire method. */ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; do { /* * Panic does not imply that the console is owned. However, it * is critical that non-panic CPUs during panic are unable to * acquire ownership in order to satisfy the assumptions of * nbcon_waiter_matches(). In particular, the assumption that * lower priorities are ignored during panic. */ if (other_cpu_in_panic()) return -EPERM; if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) return -EPERM; if (cur->unsafe) return -EBUSY; /* * The console should never be safe for a direct acquire * if an unsafe hostile takeover has ever happened. */ WARN_ON_ONCE(cur->unsafe_takeover); new.atom = cur->atom; new.prio = ctxt->prio; new.req_prio = NBCON_PRIO_NONE; new.unsafe = cur->unsafe_takeover; new.cpu = cpu; } while (!nbcon_state_try_cmpxchg(con, cur, &new)); return 0; } static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) { /* * The request context is well defined by the @req_prio because: * * - Only a context with a priority higher than the owner can become * a waiter. * - Only a context with a priority higher than the waiter can * directly take over the request. * - There are only three priorities. * - Only one CPU is allowed to request PANIC priority. * - Lower priorities are ignored during panic() until reboot. * * As a result, the following scenario is *not* possible: * * 1. This context is currently a waiter. * 2. Another context with a higher priority than this context * directly takes ownership. * 3. The higher priority context releases the ownership. * 4. Another lower priority context takes the ownership. * 5. Another context with the same priority as this context * creates a request and starts waiting. * * Event #1 implies this context is EMERGENCY. * Event #2 implies the new context is PANIC. * Event #3 occurs when panic() has flushed the console. * Events #4 and #5 are not possible due to the other_cpu_in_panic() * check in nbcon_context_try_acquire_direct(). */ return (cur->req_prio == expected_prio); } /** * nbcon_context_try_acquire_requested - Try to acquire after having * requested a handover * @ctxt: The context of the caller * @cur: The current console state * * This is a helper function for nbcon_context_try_acquire_handover(). * It is called when the console is in an unsafe state. The current * owner will release the console on exit from the unsafe region. * * Return: 0 on success and @cur is updated to the new console state. * Otherwise an error code on failure. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU * or this context is no longer the waiter. * * -EBUSY: The console is still locked. The caller should * continue waiting. * * Note: The caller must still remove the request when an error has occurred * except when this context is no longer the waiter. */ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; /* Note that the caller must still remove the request! */ if (other_cpu_in_panic()) return -EPERM; /* * Note that the waiter will also change if there was an unsafe * hostile takeover. */ if (!nbcon_waiter_matches(cur, ctxt->prio)) return -EPERM; /* If still locked, caller should continue waiting. */ if (cur->prio != NBCON_PRIO_NONE) return -EBUSY; /* * The previous owner should have never released ownership * in an unsafe region. */ WARN_ON_ONCE(cur->unsafe); new.atom = cur->atom; new.prio = ctxt->prio; new.req_prio = NBCON_PRIO_NONE; new.unsafe = cur->unsafe_takeover; new.cpu = cpu; if (!nbcon_state_try_cmpxchg(con, cur, &new)) { /* * The acquire could fail only when it has been taken * over by a higher priority context. */ WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); return -EPERM; } /* Handover success. This context now owns the console. */ return 0; } /** * nbcon_context_try_acquire_handover - Try to acquire via handover * @ctxt: The context of the caller * @cur: The current console state * * The function must be called only when the context has higher priority * than the current owner and the console is in an unsafe state. * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. * * The function sets "req_prio" field to make the current owner aware of * the request. Then it waits until the current owner releases the console, * or an even higher context takes over the request, or timeout expires. * * The current owner checks the "req_prio" field on exit from the unsafe * region and releases the console. It does not touch the "req_prio" field * so that the console stays reserved for the waiter. * * Return: 0 on success. Otherwise, an error code on failure. Also @cur * is updated to the latest state when failed to modify it. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU. * Or a higher priority context has taken over the * console or the handover request. * * -EBUSY: The current owner is on the same CPU so that the hand * shake could not work. Or the current owner is not * willing to wait (zero timeout). Or the console does * not enter the safe state before timeout passed. The * caller might still use the unsafe hostile takeover * when allowed. * * -EAGAIN: @cur has changed when creating the handover request. * The caller should retry with direct acquire. */ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; int timeout; int request_err = -EBUSY; /* * Check that the handover is called when the direct acquire failed * with -EBUSY. */ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(!cur->unsafe); /* Handover is not possible on the same CPU. */ if (cur->cpu == cpu) return -EBUSY; /* * Console stays unsafe after an unsafe takeover until re-initialized. * Waiting is not going to help in this case. */ if (cur->unsafe_takeover) return -EBUSY; /* Is the caller willing to wait? */ if (ctxt->spinwait_max_us == 0) return -EBUSY; /* * Setup a request for the handover. The caller should try to acquire * the console directly when the current state has been modified. */ new.atom = cur->atom; new.req_prio = ctxt->prio; if (!nbcon_state_try_cmpxchg(con, cur, &new)) return -EAGAIN; cur->atom = new.atom; /* Wait until there is no owner and then acquire the console. */ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { /* On successful acquire, this request is cleared. */ request_err = nbcon_context_try_acquire_requested(ctxt, cur); if (!request_err) return 0; /* * If the acquire should be aborted, it must be ensured * that the request is removed before returning to caller. */ if (request_err == -EPERM) break; udelay(1); /* Re-read the state because some time has passed. */ nbcon_state_read(con, cur); } /* Timed out or aborted. Carefully remove handover request. */ do { /* * No need to remove request if there is a new waiter. This * can only happen if a higher priority context has taken over * the console or the handover request. */ if (!nbcon_waiter_matches(cur, ctxt->prio)) return -EPERM; /* Unset request for handover. */ new.atom = cur->atom; new.req_prio = NBCON_PRIO_NONE; if (nbcon_state_try_cmpxchg(con, cur, &new)) { /* * Request successfully unset. Report failure of * acquiring via handover. */ cur->atom = new.atom; return request_err; } /* * Unable to remove request. Try to acquire in case * the owner has released the lock. */ } while (nbcon_context_try_acquire_requested(ctxt, cur)); /* Lucky timing. The acquire succeeded while removing the request. */ return 0; } /** * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover * @ctxt: The context of the caller * @cur: The current console state * * Acquire the console even in the unsafe state. * * It can be permitted by setting the 'allow_unsafe_takeover' field only * by the final attempt to flush messages in panic(). * * Return: 0 on success. -EPERM when not allowed by the context. */ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; if (!ctxt->allow_unsafe_takeover) return -EPERM; /* Ensure caller is allowed to perform unsafe hostile takeovers. */ if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) return -EPERM; /* * Check that try_acquire_direct() and try_acquire_handover() returned * -EBUSY in the right situation. */ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(cur->unsafe != true); do { new.atom = cur->atom; new.cpu = cpu; new.prio = ctxt->prio; new.unsafe |= cur->unsafe_takeover; new.unsafe_takeover |= cur->unsafe; } while (!nbcon_state_try_cmpxchg(con, cur, &new)); return 0; } static struct printk_buffers panic_nbcon_pbufs; /** * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller * * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. * * If the caller allowed an unsafe hostile takeover, on success the * caller should check the current console state to see if it is * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; int err; nbcon_state_read(con, &cur); try_again: err = nbcon_context_try_acquire_direct(ctxt, &cur); if (err != -EBUSY) goto out; err = nbcon_context_try_acquire_handover(ctxt, &cur); if (err == -EAGAIN) goto try_again; if (err != -EBUSY) goto out; err = nbcon_context_try_acquire_hostile(ctxt, &cur); out: if (err) return false; /* Acquire succeeded. */ /* Assign the appropriate buffer for this context. */ if (atomic_read(&panic_cpu) == cpu) ctxt->pbufs = &panic_nbcon_pbufs; else ctxt->pbufs = con->pbufs; /* Set the record sequence for this context to print. */ ctxt->seq = nbcon_seq_read(ctxt->console); return true; } static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, int expected_prio) { /* * A similar function, nbcon_waiter_matches(), only deals with * EMERGENCY and PANIC priorities. However, this function must also * deal with the NORMAL priority, which requires additional checks * and constraints. * * For the case where preemption and interrupts are disabled, it is * enough to also verify that the owning CPU has not changed. * * For the case where preemption or interrupts are enabled, an * external synchronization method *must* be used. In particular, * the driver-specific locking mechanism used in device_lock() * (including disabling migration) should be used. It prevents * scenarios such as: * * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and * is scheduled out. * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY * and releases it. * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X] * and is scheduled out. * 4. [Task A] gets running on [CPU X] and sees that the console is * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus * [Task A] thinks it is the owner when it is not. */ if (cur->prio != expected_prio) return false; if (cur->cpu != expected_cpu) return false; return true; } /** * nbcon_context_release - Release the console * @ctxt: The nbcon context from nbcon_context_try_acquire() */ static void nbcon_context_release(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; struct nbcon_state new; nbcon_state_read(con, &cur); do { if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) break; new.atom = cur.atom; new.prio = NBCON_PRIO_NONE; /* * If @unsafe_takeover is set, it is kept set so that * the state remains permanently unsafe. */ new.unsafe |= cur.unsafe_takeover; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ctxt->pbufs = NULL; } /** * nbcon_context_can_proceed - Check whether ownership can proceed * @ctxt: The nbcon context from nbcon_context_try_acquire() * @cur: The current console state * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * Must be invoked when entering the unsafe state to make sure that it still * owns the lock. Also must be invoked when exiting the unsafe context * to eventually free the lock for a higher priority context which asked * for the friendly handover. * * It can be called inside an unsafe section when the console is just * temporary in safe state instead of exiting and entering the unsafe * state. * * Also it can be called in the safe context before doing an expensive * safe operation. It does not make sense to do the operation when * a higher priority context took the lock. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); /* Make sure this context still owns the console. */ if (!nbcon_owner_matches(cur, cpu, ctxt->prio)) return false; /* The console owner can proceed if there is no waiter. */ if (cur->req_prio == NBCON_PRIO_NONE) return true; /* * A console owner within an unsafe region is always allowed to * proceed, even if there are waiters. It can perform a handover * when exiting the unsafe region. Otherwise the waiter will * need to perform an unsafe hostile takeover. */ if (cur->unsafe) return true; /* Waiters always have higher priorities than owners. */ WARN_ON_ONCE(cur->req_prio <= cur->prio); /* * Having a safe point for take over and eventually a few * duplicated characters or a full line is way better than a * hostile takeover. Post processing can take care of the garbage. * Release and hand over. */ nbcon_context_release(ctxt); /* * It is not clear whether the waiter really took over ownership. The * outermost callsite must make the final decision whether console * ownership is needed for it to proceed. If yes, it must reacquire * ownership (possibly hostile) before carefully proceeding. * * The calling context no longer owns the console so go back all the * way instead of trying to implement reacquire heuristics in tons of * places. */ return false; } /** * nbcon_can_proceed - Check whether ownership can proceed * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * It is used in nbcon_enter_unsafe() to make sure that it still owns the * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock * for a higher priority context which asked for the friendly handover. * * It can be called inside an unsafe section when the console is just * temporary in safe state instead of exiting and entering the unsafe state. * * Also it can be called in the safe context before doing an expensive safe * operation. It does not make sense to do the operation when a higher * priority context took the lock. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; struct nbcon_state cur; nbcon_state_read(con, &cur); return nbcon_context_can_proceed(ctxt, &cur); } EXPORT_SYMBOL_GPL(nbcon_can_proceed); #define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) #define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) /** * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state * @ctxt: The nbcon context from nbcon_context_try_acquire() * @unsafe: The new value for the unsafe bit * * Return: True if the unsafe state was updated and this context still * owns the console. Otherwise false if ownership was handed * over or taken. * * This function allows console owners to modify the unsafe status of the * console. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. * * Internal helper to avoid duplicated code. */ static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) { struct console *con = ctxt->console; struct nbcon_state cur; struct nbcon_state new; nbcon_state_read(con, &cur); do { /* * The unsafe bit must not be cleared if an * unsafe hostile takeover has occurred. */ if (!unsafe && cur.unsafe_takeover) goto out; if (!nbcon_context_can_proceed(ctxt, &cur)) return false; new.atom = cur.atom; new.unsafe = unsafe; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); cur.atom = new.atom; out: return nbcon_context_can_proceed(ctxt, &cur); } static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, char *buf, unsigned int len) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; struct nbcon_state cur; wctxt->outbuf = buf; wctxt->len = len; nbcon_state_read(con, &cur); wctxt->unsafe_takeover = cur.unsafe_takeover; } /** * nbcon_enter_unsafe - Enter an unsafe region in the driver * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); bool is_owner; is_owner = nbcon_context_enter_unsafe(ctxt); if (!is_owner) nbcon_write_context_set_buf(wctxt, NULL, 0); return is_owner; } EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); /** * nbcon_exit_unsafe - Exit an unsafe region in the driver * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); bool ret; ret = nbcon_context_exit_unsafe(ctxt); if (!ret) nbcon_write_context_set_buf(wctxt, NULL, 0); return ret; } EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); /** * nbcon_reacquire_nobuf - Reacquire a console after losing ownership * while printing * @wctxt: The write context that was handed to the write callback * * Since ownership can be lost at any time due to handover or takeover, a * printing context _must_ be prepared to back out immediately and * carefully. However, there are scenarios where the printing context must * reacquire ownership in order to finalize or revert hardware changes. * * This function allows a printing context to reacquire ownership using the * same priority as its previous ownership. * * Note that after a successful reacquire the printing context will have no * output buffer because that has been lost. This function cannot be used to * resume printing. */ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); while (!nbcon_context_try_acquire(ctxt)) cpu_relax(); nbcon_write_context_set_buf(wctxt, NULL, 0); } EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function * @use_atomic: True if the write_atomic() callback is to be used * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. If the caller * wants to do more it must reacquire the console first. * * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; struct printk_message pmsg = { .pbufs = ctxt->pbufs, }; unsigned long con_dropped; struct nbcon_state cur; unsigned long dropped; unsigned long ulseq; /* * This function should never be called for consoles that have not * implemented the necessary callback for writing: i.e. legacy * consoles and, when atomic, nbcon consoles with no write_atomic(). * Handle it as if ownership was lost and try to continue. * * Note that for nbcon consoles the write_thread() callback is * mandatory and was already checked in nbcon_alloc(). */ if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || !(console_srcu_read_flags(con) & CON_NBCON))) { nbcon_context_release(ctxt); return false; } /* * The printk buffers are filled within an unsafe section. This * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from * clobbering each other. */ if (!nbcon_context_enter_unsafe(ctxt)) return false; ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); if (!ctxt->backlog) return nbcon_context_exit_unsafe(ctxt); /* * @con->dropped is not protected in case of an unsafe hostile * takeover. In that situation the update can be racy so * annotate it accordingly. */ con_dropped = data_race(READ_ONCE(con->dropped)); dropped = con_dropped + pmsg.dropped; if (dropped && !is_extended) console_prepend_dropped(&pmsg, dropped); /* * If the previous owner was assigned the same record, this context * has taken over ownership and is replaying the record. Prepend a * message to let the user know the record is replayed. */ ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq)); if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) { console_prepend_replay(&pmsg); } else { /* * Ensure this context is still the owner before trying to * update @nbcon_prev_seq. Otherwise the value in @ulseq may * not be from the previous owner and instead be some later * value from the context that took over ownership. */ nbcon_state_read(con, &cur); if (!nbcon_context_can_proceed(ctxt, &cur)) return false; atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq, __u64seq_to_ulseq(pmsg.seq)); } if (!nbcon_context_exit_unsafe(ctxt)) return false; /* For skipped records just update seq/dropped in @con. */ if (pmsg.outbuf_len == 0) goto update_con; /* Initialize the write context for driver callbacks. */ nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); if (use_atomic) con->write_atomic(con, wctxt); else con->write_thread(con, wctxt); if (!wctxt->outbuf) { /* * Ownership was lost and reacquired by the driver. Handle it * as if ownership was lost. */ nbcon_context_release(ctxt); return false; } /* * Ownership may have been lost but _not_ reacquired by the driver. * This case is detected and handled when entering unsafe to update * dropped/seq values. */ /* * Since any dropped message was successfully output, reset the * dropped count for the console. */ dropped = 0; update_con: /* * The dropped count and the sequence number are updated within an * unsafe section. This limits update races to the panic context and * allows the panic context to win. */ if (!nbcon_context_enter_unsafe(ctxt)) return false; if (dropped != con_dropped) { /* Counterpart to the READ_ONCE() above. */ WRITE_ONCE(con->dropped, dropped); } nbcon_seq_try_update(ctxt, pmsg.seq + 1); return nbcon_context_exit_unsafe(ctxt); } /* * nbcon_emit_one - Print one record for an nbcon console using the * specified callback * @wctxt: An initialized write context struct to use for this context * @use_atomic: True if the write_atomic() callback is to be used * * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. * * False, when there is no pending record, or when the console * context cannot be acquired, or the ownership has been lost. * The caller should give up. Either the job is done, cannot be * done, or will be handled by the owning context. * * This is an internal helper to handle the locking of the console before * calling nbcon_emit_next_record(). */ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; unsigned long flags; bool ret = false; if (!use_atomic) { con->device_lock(con, &flags); /* * Ensure this stays on the CPU to make handover and * takeover possible. */ cant_migrate(); } if (!nbcon_context_try_acquire(ctxt)) goto out; /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no * longer valid. * * The higher priority printing context takes over responsibility * to print the pending records. */ if (!nbcon_emit_next_record(wctxt, use_atomic)) goto out; nbcon_context_release(ctxt); ret = ctxt->backlog; out: if (!use_atomic) con->device_unlock(con, flags); return ret; } /** * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup * @con: Console to operate on * @ctxt: The nbcon context from nbcon_context_try_acquire() * * Return: True if the thread should shutdown or if the console is * allowed to print and a record is available. False otherwise. * * After the thread wakes up, it must first check if it should shutdown before * attempting any printing. */ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt) { bool ret = false; short flags; int cookie; if (kthread_should_stop()) return true; cookie = console_srcu_read_lock(); flags = console_srcu_read_flags(con); if (console_is_usable(con, flags, false)) { /* Bring the sequence in @ctxt up to date */ ctxt->seq = nbcon_seq_read(con); ret = prb_read_valid(prb, ctxt->seq, NULL); } console_srcu_read_unlock(cookie); return ret; } /** * nbcon_kthread_func - The printer thread function * @__console: Console to operate on * * Return: 0 */ static int nbcon_kthread_func(void *__console) { struct console *con = __console; struct nbcon_write_context wctxt = { .ctxt.console = con, .ctxt.prio = NBCON_PRIO_NORMAL, }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); short con_flags; bool backlog; int cookie; wait_for_event: /* * Guarantee this task is visible on the rcuwait before * checking the wake condition. * * The full memory barrier within set_current_state() of * ___rcuwait_wait_event() pairs with the full memory * barrier within rcuwait_has_sleeper(). * * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A. */ rcuwait_wait_event(&con->rcuwait, nbcon_kthread_should_wakeup(con, ctxt), TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */ do { if (kthread_should_stop()) return 0; backlog = false; /* * Keep the srcu read lock around the entire operation so that * synchronize_srcu() can guarantee that the kthread stopped * or suspended printing. */ cookie = console_srcu_read_lock(); con_flags = console_srcu_read_flags(con); if (console_is_usable(con, con_flags, false)) backlog = nbcon_emit_one(&wctxt, false); console_srcu_read_unlock(cookie); cond_resched(); } while (backlog); goto wait_for_event; } /** * nbcon_irq_work - irq work to wake console printer thread * @irq_work: The irq work to operate on */ static void nbcon_irq_work(struct irq_work *irq_work) { struct console *con = container_of(irq_work, struct console, irq_work); nbcon_kthread_wake(con); } static inline bool rcuwait_has_sleeper(struct rcuwait *w) { /* * Guarantee any new records can be seen by tasks preparing to wait * before this context checks if the rcuwait is empty. * * This full memory barrier pairs with the full memory barrier within * set_current_state() of ___rcuwait_wait_event(), which is called * after prepare_to_rcuwait() adds the waiter but before it has * checked the wait condition. * * This pairs with nbcon_kthread_func:A. */ smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ return rcuwait_active(w); } /** * nbcon_kthreads_wake - Wake up printing threads using irq_work */ void nbcon_kthreads_wake(void) { struct console *con; int cookie; if (!printk_kthreads_running) return; cookie = console_srcu_read_lock(); for_each_console_srcu(con) { if (!(console_srcu_read_flags(con) & CON_NBCON)) continue; /* * Only schedule irq_work if the printing thread is * actively waiting. If not waiting, the thread will * notice by itself that it has work to do. */ if (rcuwait_has_sleeper(&con->rcuwait)) irq_work_queue(&con->irq_work); } console_srcu_read_unlock(cookie); } /* * nbcon_kthread_stop - Stop a console printer thread * @con: Console to operate on */ void nbcon_kthread_stop(struct console *con) { lockdep_assert_console_list_lock_held(); if (!con->kthread) return; kthread_stop(con->kthread); con->kthread = NULL; } /** * nbcon_kthread_create - Create a console printer thread * @con: Console to operate on * * Return: True if the kthread was started or already exists. * Otherwise false and @con must not be registered. * * This function is called when it will be expected that nbcon consoles are * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL * will be no longer flushed by the legacy loop. This is why failure must * be fatal for console registration. * * If @con was already registered and this function fails, @con must be * unregistered before the global state variable @printk_kthreads_running * can be set. */ bool nbcon_kthread_create(struct console *con) { struct task_struct *kt; lockdep_assert_console_list_lock_held(); if (con->kthread) return true; kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); if (WARN_ON(IS_ERR(kt))) { con_printk(KERN_ERR, con, "failed to start printing thread\n"); return false; } con->kthread = kt; /* * It is important that console printing threads are scheduled * shortly after a printk call and with generous runtime budgets. */ sched_set_normal(con->kthread, -20); return true; } /* Track the nbcon emergency nesting per CPU. */ static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; /** * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer * * Context: For reading, any context. For writing, any context which could * not be migrated to another CPU. * Return: Either a pointer to the per CPU emergency nesting counter of * the current CPU or to the init data during early boot. * * The function is safe for reading per-CPU variables in any context because * preemption is disabled if the current CPU is in the emergency state. See * also nbcon_cpu_emergency_enter(). */ static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) { /* * The value of __printk_percpu_data_ready gets set in normal * context and before SMP initialization. As a result it could * never change while inside an nbcon emergency section. */ if (!printk_percpu_data_ready()) return &early_nbcon_pcpu_emergency_nesting; return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting); } /** * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon * printing on the current CPU * * Context: Any context. * Return: The nbcon_prio to use for acquiring an nbcon console in this * context for printing. * * The function is safe for reading per-CPU data in any context because * preemption is disabled if the current CPU is in the emergency or panic * state. */ enum nbcon_prio nbcon_get_default_prio(void) { unsigned int *cpu_emergency_nesting; if (this_cpu_in_panic()) return NBCON_PRIO_PANIC; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); if (*cpu_emergency_nesting) return NBCON_PRIO_EMERGENCY; return NBCON_PRIO_NORMAL; } /** * nbcon_legacy_emit_next_record - Print one record for an nbcon console * in legacy contexts * @con: The console to print on * @handover: Will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding * both the console_lock and the SRCU read lock. Otherwise it * is set to false. * @cookie: The cookie from the SRCU read lock. * @use_atomic: Set true when called in an atomic or unknown context. * It affects which nbcon callback will be used: write_atomic() * or write_thread(). * * When false, the write_thread() callback is used and would be * called in a preemtible context unless disabled by the * device_lock. The legacy handover is not allowed in this mode. * * Context: Any context except NMI. * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. * * False, when there is no pending record, or when the console * context cannot be acquired, or the ownership has been lost. * The caller should give up. Either the job is done, cannot be * done, or will be handled by the owning context. * * This function is meant to be called by console_flush_all() to print records * on nbcon consoles from legacy context (printing via console unlocking). * Essentially it is the nbcon version of console_emit_next_record(). */ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie, bool use_atomic) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); unsigned long flags; bool progress; ctxt->console = con; ctxt->prio = nbcon_get_default_prio(); if (use_atomic) { /* * In an atomic or unknown context, use the same procedure as * in console_emit_next_record(). It allows to handover. */ printk_safe_enter_irqsave(flags); console_lock_spinning_enable(); stop_critical_timings(); } progress = nbcon_emit_one(&wctxt, use_atomic); if (use_atomic) { start_critical_timings(); *handover = console_lock_spinning_disable_and_check(cookie); printk_safe_exit_irqrestore(flags); } else { /* Non-atomic does not perform legacy spinning handovers. */ *handover = false; } return progress; } /** * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on * failure. * * Errors: * * -EPERM: Unable to acquire console ownership. * * -EAGAIN: Another context took over ownership while printing. * * -ENOENT: A record before @stop_seq is not available. * * If flushing up to @stop_seq was not successful, it only makes sense for the * caller to try again when -EAGAIN was returned. When -EPERM is returned, * this context is not allowed to acquire the console. When -ENOENT is * returned, it cannot be expected that the unfinalized record will become * available. */ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, bool allow_unsafe_takeover) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); int err = 0; ctxt->console = con; ctxt->spinwait_max_us = 2000; ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = allow_unsafe_takeover; if (!nbcon_context_try_acquire(ctxt)) return -EPERM; while (nbcon_seq_read(con) < stop_seq) { /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no * longer valid. */ if (!nbcon_emit_next_record(&wctxt, true)) return -EAGAIN; if (!ctxt->backlog) { /* Are there reserved but not yet finalized records? */ if (nbcon_seq_read(con) < stop_seq) err = -ENOENT; break; } } nbcon_context_release(ctxt); return err; } /** * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * This will stop flushing before @stop_seq if another context has ownership. * That context is then responsible for the flushing. Likewise, if new records * are added while this context was flushing and there is no other context * to handle the printing, this context must also flush those records. */ static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, bool allow_unsafe_takeover) { struct console_flush_type ft; unsigned long flags; int err; again: /* * Atomic flushing does not use console driver synchronization (i.e. * it does not hold the port lock for uart consoles). Therefore IRQs * must be disabled to avoid being interrupted and then calling into * a driver that will deadlock trying to acquire console ownership. */ local_irq_save(flags); err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); local_irq_restore(flags); /* * If there was a new owner (-EPERM, -EAGAIN), that context is * responsible for completing. * * Do not wait for records not yet finalized (-ENOENT) to avoid a * possible deadlock. They will either get flushed by the writer or * eventually skipped on panic CPU. */ if (err) return; /* * If flushing was successful but more records are available, this * context must flush those remaining records if the printer thread * is not available do it. */ printk_get_console_flush_type(&ft); if (!ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { stop_seq = prb_next_reserve_seq(prb); goto again; } } /** * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * @stop_seq: Flush up until this record * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers */ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover) { struct console *con; int cookie; cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); if (!(flags & CON_NBCON)) continue; if (!console_is_usable(con, flags, true)) continue; if (nbcon_seq_read(con) >= stop_seq) continue; nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); } console_srcu_read_unlock(cookie); } /** * nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * * Flush the backlog up through the currently newest record. Any new * records added while flushing will not be flushed if there is another * context available to handle the flushing. This is to avoid one CPU * printing unbounded because other CPUs continue to add records. */ void nbcon_atomic_flush_pending(void) { __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false); } /** * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their * write_atomic() callback and allowing unsafe hostile takeovers * * Flush the backlog up through the currently newest record. Unsafe hostile * takeovers will be performed, if necessary. */ void nbcon_atomic_flush_unsafe(void) { __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true); } /** * nbcon_cpu_emergency_enter - Enter an emergency section where printk() * messages for that CPU are flushed directly * * Context: Any context. Disables preemption. * * When within an emergency section, printk() calls will attempt to flush any * pending messages in the ringbuffer. */ void nbcon_cpu_emergency_enter(void) { unsigned int *cpu_emergency_nesting; preempt_disable(); cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); (*cpu_emergency_nesting)++; } /** * nbcon_cpu_emergency_exit - Exit an emergency section * * Context: Within an emergency section. Enables preemption. */ void nbcon_cpu_emergency_exit(void) { unsigned int *cpu_emergency_nesting; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) (*cpu_emergency_nesting)--; preempt_enable(); } /** * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize * * Return: True if the console was fully allocated and initialized. * Otherwise @con must not be registered. * * When allocation and init was successful, the console must be properly * freed using nbcon_free() once it is no longer needed. */ bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; /* The write_thread() callback is mandatory. */ if (WARN_ON(!con->write_thread)) return false; rcuwait_init(&con->rcuwait); init_irq_work(&con->irq_work, nbcon_irq_work); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL); nbcon_state_set(con, &state); /* * Initialize @nbcon_seq to the highest possible sequence number so * that practically speaking it will have nothing to print until a * desired initial sequence number has been set via nbcon_seq_force(). */ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb)); if (con->flags & CON_BOOT) { /* * Boot console printing is synchronized with legacy console * printing, so boot consoles can share the same global printk * buffers. */ con->pbufs = &printk_shared_pbufs; } else { con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); if (!con->pbufs) { con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); return false; } if (printk_kthreads_running) { if (!nbcon_kthread_create(con)) { kfree(con->pbufs); con->pbufs = NULL; return false; } } } return true; } /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data */ void nbcon_free(struct console *con) { struct nbcon_state state = { }; if (printk_kthreads_running) nbcon_kthread_stop(con); nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ if (!(con->flags & CON_BOOT)) kfree(con->pbufs); con->pbufs = NULL; } /** * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe * section * @con: The nbcon console to acquire * * Context: Under the locking mechanism implemented in * @con->device_lock() including disabling migration. * Return: True if the console was acquired. False otherwise. * * Console drivers will usually use their own internal synchronization * mechasism to synchronize between console printing and non-printing * activities (such as setting baud rates). However, nbcon console drivers * supporting atomic consoles may also want to mark unsafe sections when * performing non-printing activities in order to synchronize against their * atomic_write() callback. * * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL * and marks it unsafe for handover/takeover. */ bool nbcon_device_try_acquire(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); cant_migrate(); memset(ctxt, 0, sizeof(*ctxt)); ctxt->console = con; ctxt->prio = NBCON_PRIO_NORMAL; if (!nbcon_context_try_acquire(ctxt)) return false; if (!nbcon_context_enter_unsafe(ctxt)) return false; return true; } EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); /** * nbcon_device_release - Exit unsafe section and release the nbcon console * @con: The nbcon console acquired in nbcon_device_try_acquire() */ void nbcon_device_release(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); struct console_flush_type ft; int cookie; if (!nbcon_context_exit_unsafe(ctxt)) return; nbcon_context_release(ctxt); /* * This context must flush any new records added while the console * was locked if the printer thread is not available to do it. The * console_srcu_read_lock must be taken to ensure the console is * usable throughout flushing. */ cookie = console_srcu_read_lock(); printk_get_console_flush_type(&ft); if (console_is_usable(con, console_srcu_read_flags(con), true) && !ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { /* * If nbcon_atomic flushing is not available, fallback to * using the legacy loop. */ if (ft.nbcon_atomic) { __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); } else if (ft.legacy_direct) { if (console_trylock()) console_unlock(); } else if (ft.legacy_offload) { printk_trigger_flush(); } } console_srcu_read_unlock(cookie); } EXPORT_SYMBOL_GPL(nbcon_device_release); |
1 5 1 23 21 2 8 15 23 6 17 16 1 15 23 1 2 20 2 20 10 1 1 8 5 1 5 5 1 1 7 2 6 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 | // SPDX-License-Identifier: GPL-2.0-or-later /* * vimc-capture.c Virtual Media Controller Driver * * Copyright (C) 2015-2017 Helen Koike <helen.fornazier@gmail.com> */ #include <media/v4l2-ioctl.h> #include <media/videobuf2-core.h> #include <media/videobuf2-dma-contig.h> #include <media/videobuf2-vmalloc.h> #include "vimc-common.h" #include "vimc-streamer.h" struct vimc_capture_device { struct vimc_ent_device ved; struct video_device vdev; struct v4l2_pix_format format; struct vb2_queue queue; struct list_head buf_list; /* * NOTE: in a real driver, a spin lock must be used to access the * queue because the frames are generated from a hardware interruption * and the isr is not allowed to sleep. * Even if it is not necessary a spinlock in the vimc driver, we * use it here as a code reference */ spinlock_t qlock; struct mutex lock; u32 sequence; struct vimc_stream stream; struct media_pad pad; }; static const struct v4l2_pix_format fmt_default = { .width = 640, .height = 480, .pixelformat = V4L2_PIX_FMT_RGB24, .field = V4L2_FIELD_NONE, .colorspace = V4L2_COLORSPACE_SRGB, }; struct vimc_capture_buffer { /* * struct vb2_v4l2_buffer must be the first element * the videobuf2 framework will allocate this struct based on * buf_struct_size and use the first sizeof(struct vb2_buffer) bytes of * memory as a vb2_buffer */ struct vb2_v4l2_buffer vb2; struct list_head list; }; static int vimc_capture_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, VIMC_PDEV_NAME, sizeof(cap->driver)); strscpy(cap->card, KBUILD_MODNAME, sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", VIMC_PDEV_NAME); return 0; } static void vimc_capture_get_format(struct vimc_ent_device *ved, struct v4l2_pix_format *fmt) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); *fmt = vcapture->format; } static int vimc_capture_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); f->fmt.pix = vcapture->format; return 0; } static int vimc_capture_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format *format = &f->fmt.pix; const struct vimc_pix_map *vpix; format->width = clamp_t(u32, format->width, VIMC_FRAME_MIN_WIDTH, VIMC_FRAME_MAX_WIDTH) & ~1; format->height = clamp_t(u32, format->height, VIMC_FRAME_MIN_HEIGHT, VIMC_FRAME_MAX_HEIGHT) & ~1; /* Don't accept a pixelformat that is not on the table */ vpix = vimc_pix_map_by_pixelformat(format->pixelformat); if (!vpix) { format->pixelformat = fmt_default.pixelformat; vpix = vimc_pix_map_by_pixelformat(format->pixelformat); } /* TODO: Add support for custom bytesperline values */ format->bytesperline = format->width * vpix->bpp; format->sizeimage = format->bytesperline * format->height; if (format->field == V4L2_FIELD_ANY) format->field = fmt_default.field; vimc_colorimetry_clamp(format); if (format->colorspace == V4L2_COLORSPACE_DEFAULT) format->colorspace = fmt_default.colorspace; return 0; } static int vimc_capture_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); int ret; /* Do not change the format while stream is on */ if (vb2_is_busy(&vcapture->queue)) return -EBUSY; ret = vimc_capture_try_fmt_vid_cap(file, priv, f); if (ret) return ret; dev_dbg(vcapture->ved.dev, "%s: format update: " "old:%dx%d (0x%x, %d, %d, %d, %d) " "new:%dx%d (0x%x, %d, %d, %d, %d)\n", vcapture->vdev.name, /* old */ vcapture->format.width, vcapture->format.height, vcapture->format.pixelformat, vcapture->format.colorspace, vcapture->format.quantization, vcapture->format.xfer_func, vcapture->format.ycbcr_enc, /* new */ f->fmt.pix.width, f->fmt.pix.height, f->fmt.pix.pixelformat, f->fmt.pix.colorspace, f->fmt.pix.quantization, f->fmt.pix.xfer_func, f->fmt.pix.ycbcr_enc); vcapture->format = f->fmt.pix; return 0; } static int vimc_capture_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f) { const struct vimc_pix_map *vpix; if (f->mbus_code) { if (f->index > 0) return -EINVAL; vpix = vimc_pix_map_by_code(f->mbus_code); } else { vpix = vimc_pix_map_by_index(f->index); } if (!vpix) return -EINVAL; f->pixelformat = vpix->pixelformat; return 0; } static int vimc_capture_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { const struct vimc_pix_map *vpix; if (fsize->index) return -EINVAL; /* Only accept code in the pix map table */ vpix = vimc_pix_map_by_code(fsize->pixel_format); if (!vpix) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; fsize->stepwise.min_width = VIMC_FRAME_MIN_WIDTH; fsize->stepwise.max_width = VIMC_FRAME_MAX_WIDTH; fsize->stepwise.min_height = VIMC_FRAME_MIN_HEIGHT; fsize->stepwise.max_height = VIMC_FRAME_MAX_HEIGHT; fsize->stepwise.step_width = 1; fsize->stepwise.step_height = 1; return 0; } static const struct v4l2_file_operations vimc_capture_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = vb2_fop_mmap, }; static const struct v4l2_ioctl_ops vimc_capture_ioctl_ops = { .vidioc_querycap = vimc_capture_querycap, .vidioc_g_fmt_vid_cap = vimc_capture_g_fmt_vid_cap, .vidioc_s_fmt_vid_cap = vimc_capture_s_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vimc_capture_try_fmt_vid_cap, .vidioc_enum_fmt_vid_cap = vimc_capture_enum_fmt_vid_cap, .vidioc_enum_framesizes = vimc_capture_enum_framesizes, .vidioc_reqbufs = vb2_ioctl_reqbufs, .vidioc_create_bufs = vb2_ioctl_create_bufs, .vidioc_prepare_buf = vb2_ioctl_prepare_buf, .vidioc_querybuf = vb2_ioctl_querybuf, .vidioc_qbuf = vb2_ioctl_qbuf, .vidioc_dqbuf = vb2_ioctl_dqbuf, .vidioc_expbuf = vb2_ioctl_expbuf, .vidioc_streamon = vb2_ioctl_streamon, .vidioc_streamoff = vb2_ioctl_streamoff, .vidioc_remove_bufs = vb2_ioctl_remove_bufs, }; static void vimc_capture_return_all_buffers(struct vimc_capture_device *vcapture, enum vb2_buffer_state state) { struct vimc_capture_buffer *vbuf, *node; spin_lock(&vcapture->qlock); list_for_each_entry_safe(vbuf, node, &vcapture->buf_list, list) { list_del(&vbuf->list); vb2_buffer_done(&vbuf->vb2.vb2_buf, state); } spin_unlock(&vcapture->qlock); } static int vimc_capture_start_streaming(struct vb2_queue *vq, unsigned int count) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); int ret; vcapture->sequence = 0; /* Start the media pipeline */ ret = video_device_pipeline_start(&vcapture->vdev, &vcapture->stream.pipe); if (ret) { vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } ret = vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 1); if (ret) { video_device_pipeline_stop(&vcapture->vdev); vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } return 0; } /* * Stop the stream engine. Any remaining buffers in the stream queue are * dequeued and passed on to the vb2 framework marked as STATE_ERROR. */ static void vimc_capture_stop_streaming(struct vb2_queue *vq) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 0); /* Stop the media pipeline */ video_device_pipeline_stop(&vcapture->vdev); /* Release all active buffers */ vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_ERROR); } static void vimc_capture_buf_queue(struct vb2_buffer *vb2_buf) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb2_buf->vb2_queue); struct vimc_capture_buffer *buf = container_of(vb2_buf, struct vimc_capture_buffer, vb2.vb2_buf); spin_lock(&vcapture->qlock); list_add_tail(&buf->list, &vcapture->buf_list); spin_unlock(&vcapture->qlock); } static int vimc_capture_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); if (*nplanes) return sizes[0] < vcapture->format.sizeimage ? -EINVAL : 0; /* We don't support multiplanes for now */ *nplanes = 1; sizes[0] = vcapture->format.sizeimage; return 0; } static int vimc_capture_buffer_prepare(struct vb2_buffer *vb) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb->vb2_queue); unsigned long size = vcapture->format.sizeimage; if (vb2_plane_size(vb, 0) < size) { dev_err(vcapture->ved.dev, "%s: buffer too small (%lu < %lu)\n", vcapture->vdev.name, vb2_plane_size(vb, 0), size); return -EINVAL; } return 0; } static const struct vb2_ops vimc_capture_qops = { .start_streaming = vimc_capture_start_streaming, .stop_streaming = vimc_capture_stop_streaming, .buf_queue = vimc_capture_buf_queue, .queue_setup = vimc_capture_queue_setup, .buf_prepare = vimc_capture_buffer_prepare, }; static const struct media_entity_operations vimc_capture_mops = { .link_validate = vimc_vdev_link_validate, }; static void vimc_capture_release(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); media_entity_cleanup(vcapture->ved.ent); kfree(vcapture); } static void vimc_capture_unregister(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); vb2_video_unregister_device(&vcapture->vdev); } static void *vimc_capture_process_frame(struct vimc_ent_device *ved, const void *frame) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); struct vimc_capture_buffer *vimc_buf; void *vbuf; spin_lock(&vcapture->qlock); /* Get the first entry of the list */ vimc_buf = list_first_entry_or_null(&vcapture->buf_list, typeof(*vimc_buf), list); if (!vimc_buf) { spin_unlock(&vcapture->qlock); return ERR_PTR(-EAGAIN); } /* Remove this entry from the list */ list_del(&vimc_buf->list); spin_unlock(&vcapture->qlock); /* Fill the buffer */ vimc_buf->vb2.vb2_buf.timestamp = ktime_get_ns(); vimc_buf->vb2.sequence = vcapture->sequence++; vimc_buf->vb2.field = vcapture->format.field; vbuf = vb2_plane_vaddr(&vimc_buf->vb2.vb2_buf, 0); memcpy(vbuf, frame, vcapture->format.sizeimage); /* Set it as ready */ vb2_set_plane_payload(&vimc_buf->vb2.vb2_buf, 0, vcapture->format.sizeimage); vb2_buffer_done(&vimc_buf->vb2.vb2_buf, VB2_BUF_STATE_DONE); return NULL; } static struct vimc_ent_device *vimc_capture_add(struct vimc_device *vimc, const char *vcfg_name) { struct v4l2_device *v4l2_dev = &vimc->v4l2_dev; const struct vimc_pix_map *vpix; struct vimc_capture_device *vcapture; struct video_device *vdev; struct vb2_queue *q; int ret; /* Allocate the vimc_capture_device struct */ vcapture = kzalloc(sizeof(*vcapture), GFP_KERNEL); if (!vcapture) return ERR_PTR(-ENOMEM); /* Initialize the media entity */ vcapture->vdev.entity.name = vcfg_name; vcapture->vdev.entity.function = MEDIA_ENT_F_IO_V4L; vcapture->pad.flags = MEDIA_PAD_FL_SINK; ret = media_entity_pads_init(&vcapture->vdev.entity, 1, &vcapture->pad); if (ret) goto err_free_vcapture; /* Initialize the lock */ mutex_init(&vcapture->lock); /* Initialize the vb2 queue */ q = &vcapture->queue; q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; q->io_modes = VB2_MMAP | VB2_DMABUF; if (vimc_allocator == VIMC_ALLOCATOR_VMALLOC) q->io_modes |= VB2_USERPTR; q->drv_priv = vcapture; q->buf_struct_size = sizeof(struct vimc_capture_buffer); q->ops = &vimc_capture_qops; q->mem_ops = vimc_allocator == VIMC_ALLOCATOR_DMA_CONTIG ? &vb2_dma_contig_memops : &vb2_vmalloc_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->min_reqbufs_allocation = 2; q->lock = &vcapture->lock; q->dev = v4l2_dev->dev; ret = vb2_queue_init(q); if (ret) { dev_err(vimc->mdev.dev, "%s: vb2 queue init failed (err=%d)\n", vcfg_name, ret); goto err_clean_m_ent; } /* Initialize buffer list and its lock */ INIT_LIST_HEAD(&vcapture->buf_list); spin_lock_init(&vcapture->qlock); /* Set default frame format */ vcapture->format = fmt_default; vpix = vimc_pix_map_by_pixelformat(vcapture->format.pixelformat); vcapture->format.bytesperline = vcapture->format.width * vpix->bpp; vcapture->format.sizeimage = vcapture->format.bytesperline * vcapture->format.height; /* Fill the vimc_ent_device struct */ vcapture->ved.ent = &vcapture->vdev.entity; vcapture->ved.process_frame = vimc_capture_process_frame; vcapture->ved.vdev_get_format = vimc_capture_get_format; vcapture->ved.dev = vimc->mdev.dev; /* Initialize the video_device struct */ vdev = &vcapture->vdev; vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_IO_MC; vdev->entity.ops = &vimc_capture_mops; vdev->release = video_device_release_empty; vdev->fops = &vimc_capture_fops; vdev->ioctl_ops = &vimc_capture_ioctl_ops; vdev->lock = &vcapture->lock; vdev->queue = q; vdev->v4l2_dev = v4l2_dev; vdev->vfl_dir = VFL_DIR_RX; strscpy(vdev->name, vcfg_name, sizeof(vdev->name)); video_set_drvdata(vdev, &vcapture->ved); /* Register the video_device with the v4l2 and the media framework */ ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (ret) { dev_err(vimc->mdev.dev, "%s: video register failed (err=%d)\n", vcapture->vdev.name, ret); goto err_clean_m_ent; } return &vcapture->ved; err_clean_m_ent: media_entity_cleanup(&vcapture->vdev.entity); err_free_vcapture: kfree(vcapture); return ERR_PTR(ret); } const struct vimc_ent_type vimc_capture_type = { .add = vimc_capture_add, .unregister = vimc_capture_unregister, .release = vimc_capture_release }; |
2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Behringer BCD2000 driver * * Copyright (C) 2014 Mario Kicherer (dev@kicherer.org) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/bitmap.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <sound/core.h> #include <sound/initval.h> #include <sound/rawmidi.h> #define PREFIX "snd-bcd2000: " #define BUFSIZE 64 static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1397, 0x00bd) }, { }, }; static const unsigned char device_cmd_prefix[] = {0x03, 0x00}; static const unsigned char bcd2000_init_sequence[] = { 0x07, 0x00, 0x00, 0x00, 0x78, 0x48, 0x1c, 0x81, 0xc4, 0x00, 0x00, 0x00, 0x5e, 0x53, 0x4a, 0xf7, 0x18, 0xfa, 0x11, 0xff, 0x6c, 0xf3, 0x90, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0xfa, 0x11, 0xff, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2, 0x34, 0x4a, 0xf7, 0x18, 0xfa, 0x11, 0xff }; struct bcd2000 { struct usb_device *dev; struct snd_card *card; struct usb_interface *intf; int card_index; int midi_out_active; struct snd_rawmidi *rmidi; struct snd_rawmidi_substream *midi_receive_substream; struct snd_rawmidi_substream *midi_out_substream; unsigned char midi_in_buf[BUFSIZE]; unsigned char midi_out_buf[BUFSIZE]; struct urb *midi_out_urb; struct urb *midi_in_urb; struct usb_anchor anchor; }; static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; static DEFINE_MUTEX(devices_mutex); static DECLARE_BITMAP(devices_used, SNDRV_CARDS); static struct usb_driver bcd2000_driver; #ifdef CONFIG_SND_DEBUG static void bcd2000_dump_buffer(const char *prefix, const char *buf, int len) { print_hex_dump(KERN_DEBUG, prefix, DUMP_PREFIX_NONE, 16, 1, buf, len, false); } #else static void bcd2000_dump_buffer(const char *prefix, const char *buf, int len) {} #endif static int bcd2000_midi_input_open(struct snd_rawmidi_substream *substream) { return 0; } static int bcd2000_midi_input_close(struct snd_rawmidi_substream *substream) { return 0; } /* (de)register midi substream from client */ static void bcd2000_midi_input_trigger(struct snd_rawmidi_substream *substream, int up) { struct bcd2000 *bcd2k = substream->rmidi->private_data; bcd2k->midi_receive_substream = up ? substream : NULL; } static void bcd2000_midi_handle_input(struct bcd2000 *bcd2k, const unsigned char *buf, unsigned int buf_len) { unsigned int payload_length, tocopy; struct snd_rawmidi_substream *midi_receive_substream; midi_receive_substream = READ_ONCE(bcd2k->midi_receive_substream); if (!midi_receive_substream) return; bcd2000_dump_buffer(PREFIX "received from device: ", buf, buf_len); if (buf_len < 2) return; payload_length = buf[0]; /* ignore packets without payload */ if (payload_length == 0) return; tocopy = min(payload_length, buf_len-1); bcd2000_dump_buffer(PREFIX "sending to userspace: ", &buf[1], tocopy); snd_rawmidi_receive(midi_receive_substream, &buf[1], tocopy); } static void bcd2000_midi_send(struct bcd2000 *bcd2k) { int len, ret; struct snd_rawmidi_substream *midi_out_substream; BUILD_BUG_ON(sizeof(device_cmd_prefix) >= BUFSIZE); midi_out_substream = READ_ONCE(bcd2k->midi_out_substream); if (!midi_out_substream) return; /* copy command prefix bytes */ memcpy(bcd2k->midi_out_buf, device_cmd_prefix, sizeof(device_cmd_prefix)); /* * get MIDI packet and leave space for command prefix * and payload length */ len = snd_rawmidi_transmit(midi_out_substream, bcd2k->midi_out_buf + 3, BUFSIZE - 3); if (len < 0) dev_err(&bcd2k->dev->dev, "%s: snd_rawmidi_transmit error %d\n", __func__, len); if (len <= 0) return; /* set payload length */ bcd2k->midi_out_buf[2] = len; bcd2k->midi_out_urb->transfer_buffer_length = BUFSIZE; bcd2000_dump_buffer(PREFIX "sending to device: ", bcd2k->midi_out_buf, len+3); /* send packet to the BCD2000 */ ret = usb_submit_urb(bcd2k->midi_out_urb, GFP_ATOMIC); if (ret < 0) dev_err(&bcd2k->dev->dev, PREFIX "%s (%p): usb_submit_urb() failed, ret=%d, len=%d\n", __func__, midi_out_substream, ret, len); else bcd2k->midi_out_active = 1; } static int bcd2000_midi_output_open(struct snd_rawmidi_substream *substream) { return 0; } static int bcd2000_midi_output_close(struct snd_rawmidi_substream *substream) { struct bcd2000 *bcd2k = substream->rmidi->private_data; if (bcd2k->midi_out_active) { usb_kill_urb(bcd2k->midi_out_urb); bcd2k->midi_out_active = 0; } return 0; } /* (de)register midi substream from client */ static void bcd2000_midi_output_trigger(struct snd_rawmidi_substream *substream, int up) { struct bcd2000 *bcd2k = substream->rmidi->private_data; if (up) { bcd2k->midi_out_substream = substream; /* check if there is data userspace wants to send */ if (!bcd2k->midi_out_active) bcd2000_midi_send(bcd2k); } else { bcd2k->midi_out_substream = NULL; } } static void bcd2000_output_complete(struct urb *urb) { struct bcd2000 *bcd2k = urb->context; bcd2k->midi_out_active = 0; if (urb->status) dev_warn(&urb->dev->dev, PREFIX "output urb->status: %d\n", urb->status); if (urb->status == -ESHUTDOWN) return; /* check if there is more data userspace wants to send */ bcd2000_midi_send(bcd2k); } static void bcd2000_input_complete(struct urb *urb) { int ret; struct bcd2000 *bcd2k = urb->context; if (urb->status) dev_warn(&urb->dev->dev, PREFIX "input urb->status: %i\n", urb->status); if (!bcd2k || urb->status == -ESHUTDOWN) return; if (urb->actual_length > 0) bcd2000_midi_handle_input(bcd2k, urb->transfer_buffer, urb->actual_length); /* return URB to device */ ret = usb_submit_urb(bcd2k->midi_in_urb, GFP_ATOMIC); if (ret < 0) dev_err(&bcd2k->dev->dev, PREFIX "%s: usb_submit_urb() failed, ret=%d\n", __func__, ret); } static const struct snd_rawmidi_ops bcd2000_midi_output = { .open = bcd2000_midi_output_open, .close = bcd2000_midi_output_close, .trigger = bcd2000_midi_output_trigger, }; static const struct snd_rawmidi_ops bcd2000_midi_input = { .open = bcd2000_midi_input_open, .close = bcd2000_midi_input_close, .trigger = bcd2000_midi_input_trigger, }; static void bcd2000_init_device(struct bcd2000 *bcd2k) { int ret; init_usb_anchor(&bcd2k->anchor); usb_anchor_urb(bcd2k->midi_out_urb, &bcd2k->anchor); usb_anchor_urb(bcd2k->midi_in_urb, &bcd2k->anchor); /* copy init sequence into buffer */ memcpy(bcd2k->midi_out_buf, bcd2000_init_sequence, 52); bcd2k->midi_out_urb->transfer_buffer_length = 52; /* submit sequence */ ret = usb_submit_urb(bcd2k->midi_out_urb, GFP_KERNEL); if (ret < 0) dev_err(&bcd2k->dev->dev, PREFIX "%s: usb_submit_urb() out failed, ret=%d: ", __func__, ret); else bcd2k->midi_out_active = 1; /* pass URB to device to enable button and controller events */ ret = usb_submit_urb(bcd2k->midi_in_urb, GFP_KERNEL); if (ret < 0) dev_err(&bcd2k->dev->dev, PREFIX "%s: usb_submit_urb() in failed, ret=%d: ", __func__, ret); /* ensure initialization is finished */ usb_wait_anchor_empty_timeout(&bcd2k->anchor, 1000); } static int bcd2000_init_midi(struct bcd2000 *bcd2k) { int ret; struct snd_rawmidi *rmidi; ret = snd_rawmidi_new(bcd2k->card, bcd2k->card->shortname, 0, 1, /* output */ 1, /* input */ &rmidi); if (ret < 0) return ret; strscpy(rmidi->name, bcd2k->card->shortname, sizeof(rmidi->name)); rmidi->info_flags = SNDRV_RAWMIDI_INFO_DUPLEX; rmidi->private_data = bcd2k; rmidi->info_flags |= SNDRV_RAWMIDI_INFO_OUTPUT; snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &bcd2000_midi_output); rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT; snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &bcd2000_midi_input); bcd2k->rmidi = rmidi; bcd2k->midi_in_urb = usb_alloc_urb(0, GFP_KERNEL); bcd2k->midi_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!bcd2k->midi_in_urb || !bcd2k->midi_out_urb) { dev_err(&bcd2k->dev->dev, PREFIX "usb_alloc_urb failed\n"); return -ENOMEM; } usb_fill_int_urb(bcd2k->midi_in_urb, bcd2k->dev, usb_rcvintpipe(bcd2k->dev, 0x81), bcd2k->midi_in_buf, BUFSIZE, bcd2000_input_complete, bcd2k, 1); usb_fill_int_urb(bcd2k->midi_out_urb, bcd2k->dev, usb_sndintpipe(bcd2k->dev, 0x1), bcd2k->midi_out_buf, BUFSIZE, bcd2000_output_complete, bcd2k, 1); /* sanity checks of EPs before actually submitting */ if (usb_urb_ep_type_check(bcd2k->midi_in_urb) || usb_urb_ep_type_check(bcd2k->midi_out_urb)) { dev_err(&bcd2k->dev->dev, "invalid MIDI EP\n"); return -EINVAL; } bcd2000_init_device(bcd2k); return 0; } static void bcd2000_free_usb_related_resources(struct bcd2000 *bcd2k, struct usb_interface *interface) { usb_kill_urb(bcd2k->midi_out_urb); usb_kill_urb(bcd2k->midi_in_urb); usb_free_urb(bcd2k->midi_out_urb); usb_free_urb(bcd2k->midi_in_urb); if (bcd2k->intf) { usb_set_intfdata(bcd2k->intf, NULL); bcd2k->intf = NULL; } } static int bcd2000_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct snd_card *card; struct bcd2000 *bcd2k; unsigned int card_index; char usb_path[32]; int err; mutex_lock(&devices_mutex); for (card_index = 0; card_index < SNDRV_CARDS; ++card_index) if (!test_bit(card_index, devices_used)) break; if (card_index >= SNDRV_CARDS) { mutex_unlock(&devices_mutex); return -ENOENT; } err = snd_card_new(&interface->dev, index[card_index], id[card_index], THIS_MODULE, sizeof(*bcd2k), &card); if (err < 0) { mutex_unlock(&devices_mutex); return err; } bcd2k = card->private_data; bcd2k->dev = interface_to_usbdev(interface); bcd2k->card = card; bcd2k->card_index = card_index; bcd2k->intf = interface; snd_card_set_dev(card, &interface->dev); strscpy(card->driver, "snd-bcd2000", sizeof(card->driver)); strscpy(card->shortname, "BCD2000", sizeof(card->shortname)); usb_make_path(bcd2k->dev, usb_path, sizeof(usb_path)); snprintf(bcd2k->card->longname, sizeof(bcd2k->card->longname), "Behringer BCD2000 at %s", usb_path); err = bcd2000_init_midi(bcd2k); if (err < 0) goto probe_error; err = snd_card_register(card); if (err < 0) goto probe_error; usb_set_intfdata(interface, bcd2k); set_bit(card_index, devices_used); mutex_unlock(&devices_mutex); return 0; probe_error: dev_info(&bcd2k->dev->dev, PREFIX "error during probing"); bcd2000_free_usb_related_resources(bcd2k, interface); snd_card_free(card); mutex_unlock(&devices_mutex); return err; } static void bcd2000_disconnect(struct usb_interface *interface) { struct bcd2000 *bcd2k = usb_get_intfdata(interface); if (!bcd2k) return; mutex_lock(&devices_mutex); /* make sure that userspace cannot create new requests */ snd_card_disconnect(bcd2k->card); bcd2000_free_usb_related_resources(bcd2k, interface); clear_bit(bcd2k->card_index, devices_used); snd_card_free_when_closed(bcd2k->card); mutex_unlock(&devices_mutex); } static struct usb_driver bcd2000_driver = { .name = "snd-bcd2000", .probe = bcd2000_probe, .disconnect = bcd2000_disconnect, .id_table = id_table, }; module_usb_driver(bcd2000_driver); MODULE_DEVICE_TABLE(usb, id_table); MODULE_AUTHOR("Mario Kicherer, dev@kicherer.org"); MODULE_DESCRIPTION("Behringer BCD2000 driver"); MODULE_LICENSE("GPL"); |
171 218 328 3 228 17 6 3 1 2 1 7 7 4 1 3 300 300 47 74 228 3 38 3 11 179 216 305 303 230 231 2 2 18 14 1 102 103 102 24 133 234 225 53 39 233 233 40 23 80 579 7 92 141 9 8 53 175 2 5 2 4 4 235 8 3 159 325 89 224 233 234 233 320 256 5 24 18 10 149 124 11 252 9 417 19 497 8 5 2 60 9 52 29 28 15 15 3 355 477 385 356 480 94 96 230 504 30 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the TCP module. * * Version: @(#)tcp.h 1.0.5 05/23/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _TCP_H #define _TCP_H #define FASTRETRANS_DEBUG 1 #include <linux/list.h> #include <linux/tcp.h> #include <linux/bug.h> #include <linux/slab.h> #include <linux/cache.h> #include <linux/percpu.h> #include <linux/skbuff.h> #include <linux/kref.h> #include <linux/ktime.h> #include <linux/indirect_call_wrapper.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> #include <net/inet_hashtables.h> #include <net/checksum.h> #include <net/request_sock.h> #include <net/sock_reuseport.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ip.h> #include <net/tcp_states.h> #include <net/tcp_ao.h> #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> #include <linux/bpf-cgroup.h> #include <linux/siphash.h> extern struct inet_hashinfo tcp_hashinfo; DECLARE_PER_CPU(unsigned int, tcp_orphan_count); int tcp_orphan_count_sum(void); DECLARE_PER_CPU(u32, tcp_tw_isn); void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 #define TCP_MIN_SND_MSS 48 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) /* * Never offer a window over 32767 without using window scaling. Some * poor stacks do signed 16bit maths! */ #define MAX_TCP_WINDOW 32767U /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U /* The initial MTU to use for probing */ #define TCP_BASE_MSS 1024 /* probing interval, default to 10 minutes as per RFC4821 */ #define TCP_PROBE_INTERVAL 600 /* Specify interval when tcp mtu probing will stop */ #define TCP_PROBE_THRESHOLD 8 /* After receiving this amount of duplicate ACKs fast retransmit starts. */ #define TCP_FASTRETRANS_THRESH 3 /* Maximal number of ACKs sent quickly to accelerate slow-start. */ #define TCP_MAX_QUICKACKS 16U /* Maximal number of window scale according to RFC1323 */ #define TCP_MAX_WSCALE 14U /* urg_data states */ #define TCP_URG_VALID 0x0100 #define TCP_URG_NOTYET 0x0200 #define TCP_URG_READ 0x0400 #define TCP_RETR1 3 /* * This is how many retries it does before it * tries to figure out if the gateway is * down. Minimal RFC value is 3; it corresponds * to ~3sec-8min depending on RTO. */ #define TCP_RETR2 15 /* * This should take at least * 90 minutes to time out. * RFC1122 says that the limit is 100 sec. * 15 is ~13-30min depending on RTO. */ #define TCP_SYN_RETRIES 6 /* This is how many retries are done * when active opening a connection. * RFC1122 says the minimum retry MUST * be at least 180secs. Nevertheless * this value is corresponding to * 63secs of retransmission with the * current initial RTO. */ #define TCP_SYNACK_RETRIES 5 /* This is how may retries are done * when passive opening a connection. * This is corresponding to 31secs of * retransmission with the current * initial RTO. */ #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN /* BSD style FIN_WAIT2 deadlock breaker. * It used to be 3min, new value is 60sec, * to combine FIN-WAIT-2 timeout with * TIME-WAIT timer. */ #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #if HZ >= 100 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ #define TCP_ATO_MIN ((unsigned)(HZ/25)) #else #define TCP_DELACK_MIN 4U #define TCP_ATO_MIN 4U #endif #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the * initial data transmission if no * valid RTT sample has been acquired, * most likely due to retrans in 3WHS. */ #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_INTVL (75*HZ) #define MAX_TCP_KEEPIDLE 32767 #define MAX_TCP_KEEPINTVL 32767 #define MAX_TCP_KEEPCNT 127 #define MAX_TCP_SYNCNT 127 /* Ensure that TCP PAWS checks are relaxed after ~2147 seconds * to avoid overflows. This assumes a clock smaller than 1 Mhz. * Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz. */ #define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC) #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated * after this time. It should be equal * (or greater than) TCP_TIMEWAIT_LEN * to provide reliability equal to one * provided by timewait state. */ #define TCP_PAWS_WINDOW 1 /* Replay window for per-host * timestamps. It must be less than * minimal timewait lifetime. */ /* * TCP option */ #define TCPOPT_NOP 1 /* Padding */ #define TCPOPT_EOL 0 /* End of options */ #define TCPOPT_MSS 2 /* Segment size negotiating */ #define TCPOPT_WINDOW 3 /* Window scaling */ #define TCPOPT_SACK_PERM 4 /* SACK Permitted */ #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ #define TCPOPT_AO 29 /* Authentication Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt */ #define TCPOPT_FASTOPEN_MAGIC 0xF989 #define TCPOPT_SMC_MAGIC 0xE2D4C3D9 /* * TCP option lengths */ #define TCPOLEN_MSS 4 #define TCPOLEN_WINDOW 3 #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 #define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 #define TCPOLEN_EXP_SMC_BASE 6 /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 #define TCPOLEN_WSCALE_ALIGNED 4 #define TCPOLEN_SACKPERM_ALIGNED 4 #define TCPOLEN_SACK_BASE 2 #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MSS_ALIGNED 4 #define TCPOLEN_EXP_SMC_BASE_ALIGNED 8 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ /* TCP thin-stream limits */ #define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ /* TCP initial congestion window as per rfc6928 */ #define TCP_INIT_CWND 10 /* Bit Flags for sysctl_tcp_fastopen */ #define TFO_CLIENT_ENABLE 1 #define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ /* Accept SYN data w/o any cookie option */ #define TFO_SERVER_COOKIE_NOT_REQD 0x200 /* Force enable TFO on all listeners, i.e., not requiring the * TCP_FASTOPEN socket option. */ #define TFO_SERVER_WO_SOCKOPT1 0x400 /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ #define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */ extern atomic_long_t tcp_memory_allocated; DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { if (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; return READ_ONCE(tcp_memory_pressure); } /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). */ static inline bool before(__u32 seq1, __u32 seq2) { return (__s32)(seq1-seq2) < 0; } #define after(seq2, seq1) before(seq1, seq2) /* is s2<=s1<=s3 ? */ static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3) { return seq3 - seq2 >= seq1 - seq2; } static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sk_wmem_queued_add(sk, -skb->truesize); if (!skb_zcopy_pure(skb)) sk_mem_uncharge(sk, skb->truesize); else sk_mem_uncharge(sk, SKB_TRUESIZE(skb_end_offset(skb))); __kfree_skb(skb); } void sk_forced_mem_schedule(struct sock *sk, int size); bool tcp_check_oom(const struct sock *sk, int shift); extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) #define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) void tcp_tasklet_init(void); int tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); void tcp_remove_empty_skb(struct sock *sk); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size, struct ubuf_info *uarg); void tcp_splice_eof(struct socket *sock); int tcp_send_mss(struct sock *sk, int *size_goal, int flags); int tcp_wmem_schedule(struct sock *sk, int copy); void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int size_goal); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); void tcp_twsk_purge(struct list_head *net_exit_list); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule); static inline void tcp_dec_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { /* How many ACKs S/ACKing new data have we sent? */ const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ icsk->icsk_ack.ato = TCP_ATO_MIN; } else icsk->icsk_ack.quick -= pkts; } } #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, TCP_TW_SYN = 3 }; enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th, u32 *tw_isn); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void __tcp_close(struct sock *sk, long timeout); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int do_tcp_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); bool tcp_bpf_bypass_getsockopt(int level, int optname); int do_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); void tcp_update_recv_tstamps(struct sk_buff *skb, struct scm_timestamping_internal *tss); void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss); void tcp_data_ready(struct sock *sk); #ifdef CONFIG_MMU int tcp_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); #endif void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); /* * BPF SKB-less helpers */ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, struct tcphdr *th, u32 *cookie); u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, struct tcphdr *th, u32 *cookie); u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss); u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct tcphdr *th); /* * TCP v4 functions exported for the inet6 API */ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); void tcp_req_err(struct sock *sk, u32 seq, bool abort); void tcp_ld_RTO_revert(struct sock *sk, u32 seq); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, struct sk_buff *skb); void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst); struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int tcp_connect(struct sock *sk); enum tcp_synack_type { TCP_SYNACK_NORMAL, TCP_SYNACK_FASTOPEN, TCP_SYNACK_COOKIE, }; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb); int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb, struct tcp_options_received *tcp_opt, int mss, u32 tsoff); #if IS_ENABLED(CONFIG_BPF) struct bpf_tcp_req_attrs { u32 rcv_tsval; u32 rcv_tsecr; u16 mss; u8 rcv_wscale; u8 snd_wscale; u8 ecn_ok; u8 wscale_ok; u8 sack_ok; u8 tstamp_ok; u8 usec_ts_ok; u8 reserved[3]; }; #endif #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. * This counter is used both as a hash input and partially encoded into * the cookie value. A cookie is only validated further if the delta * between the current counter value and the encoded one is less than this, * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ #define MAX_SYNCOOKIE_AGE 2 #define TCP_SYNCOOKIE_PERIOD (60 * HZ) #define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) /* syncookies: remember time of last synqueue overflow * But do not dirty this field too often (once per second is enough) * It is racy as we do not hold a lock, but race is very minor. */ static inline void tcp_synq_overflow(const struct sock *sk) { unsigned int last_overflow; unsigned int now = jiffies; if (sk->sk_reuseport) { struct sock_reuseport *reuse; reuse = rcu_dereference(sk->sk_reuseport_cb); if (likely(reuse)) { last_overflow = READ_ONCE(reuse->synq_overflow_ts); if (!time_between32(now, last_overflow, last_overflow + HZ)) WRITE_ONCE(reuse->synq_overflow_ts, now); return; } } last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); if (!time_between32(now, last_overflow, last_overflow + HZ)) WRITE_ONCE(tcp_sk_rw(sk)->rx_opt.ts_recent_stamp, now); } /* syncookies: no recent synqueue overflow on this listening socket? */ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { unsigned int last_overflow; unsigned int now = jiffies; if (sk->sk_reuseport) { struct sock_reuseport *reuse; reuse = rcu_dereference(sk->sk_reuseport_cb); if (likely(reuse)) { last_overflow = READ_ONCE(reuse->synq_overflow_ts); return !time_between32(now, last_overflow - HZ, last_overflow + TCP_SYNCOOKIE_VALID); } } last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, * then we're under synflood. However, we have to use * 'last_overflow - HZ' as lower bound. That's because a concurrent * tcp_synq_overflow() could update .ts_recent_stamp after we read * jiffies but before we store .ts_recent_stamp into last_overflow, * which could lead to rejecting a valid syncookie. */ return !time_between32(now, last_overflow - HZ, last_overflow + TCP_SYNCOOKIE_VALID); } static inline u32 tcp_cookie_time(void) { u64 val = get_jiffies_64(); do_div(val, TCP_SYNCOOKIE_PERIOD); return val; } /* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val) { if (usec_ts) return div_u64(val, NSEC_PER_USEC); return div_u64(val, NSEC_PER_MSEC); } u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); u64 cookie_init_timestamp(struct request_sock *req, u64 now); bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *opt); static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst) { return READ_ONCE(net->ipv4.sysctl_tcp_ecn) || dst_feature(dst, RTAX_FEATURE_ECN); } #if IS_ENABLED(CONFIG_BPF) static inline bool cookie_bpf_ok(struct sk_buff *skb) { return skb->sk; } struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb); #else static inline bool cookie_bpf_ok(struct sk_buff *skb) { return false; } static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, struct sk_buff *skb) { return NULL; } #endif /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb); void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); enum tcp_queue { TCP_FRAG_IN_WRITE_QUEUE, TCP_FRAG_IN_RTX_QUEUE, }; int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, struct sk_buff *skb, u32 len, unsigned int mss_now, gfp_t gfp); void tcp_send_probe0(struct sock *); int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority, enum sk_rst_reason reason); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto); void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_sack_compress_send_ack(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1) __sock_put(sk); if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1) __sock_put(sk); inet_csk_clear_xmit_timers(sk); } unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); unsigned int tcp_current_mss(struct sock *sk); u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { int cutoff; /* When peer uses tiny windows, there is no use in packetizing * to sub-MSS pieces for the sake of SWS or making sure there * are enough packets in the pipe for fast recovery. * * On the other hand, for extremely large MSS devices, handling * smaller than MSS windows in this way does make sense. */ if (tp->max_window > TCP_MSS_DEFAULT) cutoff = (tp->max_window >> 1); else cutoff = tp->max_window; if (cutoff && pktsize > cutoff) return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); void tcp_read_done(struct sock *sk, size_t len); void tcp_initialize_rcv_mss(struct sock *sk); int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); static inline void tcp_bound_rto(struct sock *sk) { if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) inet_csk(sk)->icsk_rto = TCP_RTO_MAX; } static inline u32 __tcp_set_rto(const struct tcp_sock *tp) { return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { /* mptcp hooks are only on the slow path */ if (sk_is_mptcp((struct sock *)tp)) return; tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); } static inline void tcp_fast_path_on(struct tcp_sock *tp) { __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); } static inline void tcp_fast_path_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) tcp_fast_path_on(tp); } u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(const struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); u32 rto_min = inet_csk(sk)->icsk_rto_min; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); return rto_min; } static inline u32 tcp_rto_min_us(const struct sock *sk) { return jiffies_to_usecs(tcp_rto_min(sk)); } static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) { return dst_metric_locked(dst, RTAX_CC_ALGO); } /* Minimum RTT in usec. ~0 means not available. */ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) { return minmax_get(&tp->rtt_min); } /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. */ static inline u32 tcp_receive_window(const struct tcp_sock *tp) { s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; if (win < 0) win = 0; return (u32) win; } /* Choose a new window, without checks for shrinking, and without * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. */ u32 __tcp_select_window(struct sock *sk); void tcp_send_window_probe(struct sock *sk); /* TCP uses 32bit jiffies to save some space. * Note that this is different from tcp_time_stamp, which * historically has been the same until linux-4.13. */ #define tcp_jiffies32 ((u32)jiffies) /* * Deliver a 32bit value for TCP timestamp option (RFC 7323) * It is no longer tied to jiffies, but to 1 ms clock. * Note: double check if you want to use tcp_jiffies32 instead of this. */ #define TCP_TS_HZ 1000 static inline u64 tcp_clock_ns(void) { return ktime_get_ns(); } static inline u64 tcp_clock_us(void) { return div_u64(tcp_clock_ns(), NSEC_PER_USEC); } static inline u64 tcp_clock_ms(void) { return div_u64(tcp_clock_ns(), NSEC_PER_MSEC); } /* TCP Timestamp included in TS option (RFC 1323) can either use ms * or usec resolution. Each socket carries a flag to select one or other * resolution, as the route attribute could change anytime. * Each flow must stick to initial resolution. */ static inline u32 tcp_clock_ts(bool usec_ts) { return usec_ts ? tcp_clock_us() : tcp_clock_ms(); } static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) { return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); } static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp) { if (tp->tcp_usec_ts) return tp->tcp_mstamp; return tcp_time_stamp_ms(tp); } void tcp_mstamp_refresh(struct tcp_sock *tp); static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) { return max_t(s64, t1 - t0, 0); } /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } /* Provide skb TSval in usec or ms unit */ static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb) { if (usec_ts) return tcp_skb_timestamp_us(skb); return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC); } static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) { return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset; } static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) { return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off; } #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) #define TCPHDR_FIN 0x01 #define TCPHDR_SYN 0x02 #define TCPHDR_RST 0x04 #define TCPHDR_PSH 0x08 #define TCPHDR_ACK 0x10 #define TCPHDR_URG 0x20 #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 #define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) /* State flags for sacked in struct tcp_skb_cb */ enum tcp_skb_cb_sacked_flags { TCPCB_SACKED_ACKED = (1 << 0), /* SKB ACK'd by a SACK block */ TCPCB_SACKED_RETRANS = (1 << 1), /* SKB retransmitted */ TCPCB_LOST = (1 << 2), /* SKB is lost */ TCPCB_TAGBITS = (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS | TCPCB_LOST), /* All tag bits */ TCPCB_REPAIRED = (1 << 4), /* SKB repaired (no skb_mstamp_ns) */ TCPCB_EVER_RETRANS = (1 << 7), /* Ever retransmitted frame */ TCPCB_RETRANS = (TCPCB_SACKED_RETRANS | TCPCB_EVER_RETRANS | TCPCB_REPAIRED), }; /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. * This is 44 bytes if IPV6 is enabled. * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately. */ struct tcp_skb_cb { __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ union { /* Note : * tcp_gso_segs/size are used in write queue only, * cf tcp_skb_pcount()/tcp_skb_mss() */ struct { u16 tcp_gso_segs; u16 tcp_gso_size; }; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ __u8 sacked; /* State flags for SACK. */ __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ eor:1, /* Is skb MSG_EOR marked? */ has_rxtstamp:1, /* SKB has a RX timestamp */ unused:5; __u32 ack_seq; /* Sequence number ACK'd */ union { struct { #define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1) /* There is space for up to 24 bytes */ __u32 is_app_limited:1, /* cwnd not fully used? */ delivered_ce:20, unused:11; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ u64 first_tx_mstamp; /* when we reached the "delivered" count */ u64 delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ }; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) extern const struct inet_connection_sock_af_ops ipv4_specific; #if IS_ENABLED(CONFIG_IPV6) /* This is the variant of inet6_iif() that must be used by TCP, * as TCP moves IP6CB into a different location in skb->cb[] */ static inline int tcp_v6_iif(const struct sk_buff *skb) { return TCP_SKB_CB(skb)->header.h6.iif; } static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb) { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } /* TCP_SKB_CB reference means this can not be used from early demux */ static inline int tcp_v6_sdif(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags)) return TCP_SKB_CB(skb)->header.h6.iif; #endif return 0; } extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); void tcp_v6_early_demux(struct sk_buff *skb); #endif /* TCP_SKB_CB reference means this can not be used from early demux */ static inline int tcp_v4_sdif(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) return TCP_SKB_CB(skb)->header.h4.iif; #endif return 0; } /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ static inline int tcp_skb_pcount(const struct sk_buff *skb) { return TCP_SKB_CB(skb)->tcp_gso_segs; } static inline void tcp_skb_pcount_set(struct sk_buff *skb, int segs) { TCP_SKB_CB(skb)->tcp_gso_segs = segs; } static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs) { TCP_SKB_CB(skb)->tcp_gso_segs += segs; } /* This is valid iff skb is in write queue and tcp_skb_pcount() > 1. */ static inline int tcp_skb_mss(const struct sk_buff *skb) { return TCP_SKB_CB(skb)->tcp_gso_size; } static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) { return likely(!TCP_SKB_CB(skb)->eor); } static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && skb_pure_zcopy_same(to, from) && skb_frags_readable(to) == skb_frags_readable(from)); } static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, const struct sk_buff *from) { return likely(mptcp_skb_can_collapse(to, from) && !skb_cmp_decrypted(to, from)); } /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ }; /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ }; /* * Interface for adding new TCP congestion control handlers */ #define TCP_CA_NAME_MAX 16 #define TCP_CA_MAX 128 #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) #define TCP_CA_UNSPEC 0 /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) union tcp_cc_info; struct ack_sample { u32 pkts_acked; s32 rtt_us; u32 in_flight; }; /* A rate sample measures the number of (original/retransmitted) data * packets delivered "delivered" over an interval of time "interval_us". * The tcp_rate.c code fills in the rate sample, and congestion * control modules that define a cong_control function to run at the end * of ACK processing can optionally chose to consult this sample when * setting cwnd and pacing rate. * A sample is invalid if "delivered" or "interval_us" is negative. */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ s32 delivered; /* number of packets delivered over interval */ s32 delivered_ce; /* number of packets delivered w/ CE marks*/ long interval_us; /* time for tp->delivered to incr "delivered" */ u32 snd_interval_us; /* snd interval for delivered packets */ u32 rcv_interval_us; /* rcv interval for delivered packets */ long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ }; struct tcp_congestion_ops { /* fast path fields are put first to fill one cache line */ /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* call when ack arrives (optional) */ void (*in_ack_event)(struct sock *sk, u32 flags); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* override sysctl_tcp_min_tso_segs */ u32 (*min_tso_segs)(struct sock *sk); /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); /* new value of cwnd after loss (required) */ u32 (*undo_cwnd)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); /* control/slow paths put last */ /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); char name[TCP_CA_NAME_MAX]; struct module *owner; struct list_head list; u32 key; u32 flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); /* cleanup private data (optional) */ void (*release)(struct sock *sk); } ____cacheline_aligned_in_smp; int tcp_register_congestion_control(struct tcp_congestion_ops *type); void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); int tcp_update_congestion_control(struct tcp_congestion_ops *type, struct tcp_congestion_ops *old_type); int tcp_validate_congestion_control(struct tcp_congestion_ops *ca); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); void tcp_cleanup_congestion_control(struct sock *sk); int tcp_set_default_congestion_control(struct net *net, const char *name); void tcp_get_default_congestion_control(struct net *net, char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool cap_net_admin); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) { return NULL; } #endif static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->cwnd_event) icsk->icsk_ca_ops->cwnd_event(sk, event); } /* From tcp_cong.c */ void tcp_set_ca_state(struct sock *sk, const u8 ca_state); /* From tcp_rate.c */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) { return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. * * tcp_is_sack - SACK enabled * tcp_is_reno - No SACK */ static inline int tcp_is_sack(const struct tcp_sock *tp) { return likely(tp->rx_opt.sack_ok); } static inline bool tcp_is_reno(const struct tcp_sock *tp) { return !tcp_is_sack(tp); } static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; } /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK * blocks etc.) we can make more aggressive calculations. * * Use this for decisions involving congestion control, use just * tp->packets_out to determine if the send queue is empty or not. * * Read this equation as: * * "Packets sent once on transmission queue" MINUS * "Packets left network, but not honestly ACKed yet" PLUS * "Packets fast retransmitted" */ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } #define TCP_INFINITE_SSTHRESH 0x7fffffff static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) { return tp->snd_cwnd; } static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) { WARN_ON_ONCE((int)val <= 0); tp->snd_cwnd = val; } static inline bool tcp_in_slow_start(const struct tcp_sock *tp) { return tcp_snd_cwnd(tp) < tp->snd_ssthresh; } static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) { return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; } static inline bool tcp_in_cwnd_reduction(const struct sock *sk) { return (TCPF_CA_CWR | TCPF_CA_Recovery) & (1 << inet_csk(sk)->icsk_ca_state); } /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is cwnd reduction phase, when cwnd is decreasing towards * ssthresh. */ static inline __u32 tcp_current_ssthresh(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); if (tcp_in_cwnd_reduction(sk)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, ((tcp_snd_cwnd(tp) >> 1) + (tcp_snd_cwnd(tp) >> 2))); } /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) void tcp_enter_cwr(struct sock *sk); __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst); /* The maximum number of MSS of available cwnd for which TSO defers * sending if not using sysctl_tcp_tso_win_divisor. */ static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp) { return 3; } /* Returns end sequence number of the receiver's advertised window */ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) { return tp->snd_una + tp->snd_wnd; } /* We follow the spirit of RFC2861 to validate cwnd but implement a more * flexible approach. The RFC suggests cwnd should not be raised unless * it was fully used previously. And that's exactly what we do in * congestion avoidance mode. But in slow start we allow cwnd to grow * as long as the application has used half the cwnd. * Example : * cwnd is 10 (IW10), but application sends 9 frames. * We allow cwnd to reach 18 when all frames are ACKed. * This check is safe because it's as aggressive as slow start which already * risks 100% overshoot. The advantage is that we discourage application to * either send more filler packets or data to artificially blow up the cwnd * usage, and allow application-limited process to probe bw more aggressively. */ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); if (tp->is_cwnd_limited) return true; /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; return false; } /* BBR congestion control needs pacing. * Same remark for SO_MAX_PACING_RATE. * sch_fq packet scheduler is efficiently handling pacing, * but is not always installed/used. * Return true if TCP stack should pace packets itself. */ static inline bool tcp_needs_internal_pacing(const struct sock *sk) { return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; } /* Estimates in how many jiffies next packet for this flow can be sent. * Scheduling a retransmit timer too early would be silly. */ static inline unsigned long tcp_pacing_delay(const struct sock *sk) { s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache; return delay > 0 ? nsecs_to_jiffies(delay) : 0; } static inline void tcp_reset_xmit_timer(struct sock *sk, const int what, unsigned long when, const unsigned long max_when) { inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), max_when); } /* Something is really bad, we could not queue an additional packet, * because qdisc is full or receiver sent a 0 window, or we are paced. * We do not want to add fuel to the fire, or abort too early, * so make sure the timer we arm now is at least 200ms in the future, * regardless of current icsk_rto value (as it could be ~2ms) */ static inline unsigned long tcp_probe0_base(const struct sock *sk) { return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); } /* Variant of inet_csk_rto_backoff() used for zero window probes */ static inline unsigned long tcp_probe0_when(const struct sock *sk, unsigned long max_when) { u8 backoff = min_t(u8, ilog2(TCP_RTO_MAX / TCP_RTO_MIN) + 1, inet_csk(sk)->icsk_backoff); u64 when = (u64)tcp_probe0_base(sk) << backoff; return (unsigned long)min_t(u64, when, max_when); } static inline void tcp_check_probe_timer(struct sock *sk) { if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, tcp_probe0_base(sk), TCP_RTO_MAX); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; } static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; } /* * Calculate(/check) TCP checksum */ static inline __sum16 tcp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base) { return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline bool tcp_checksum_complete(struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && __skb_checksum_complete(skb); } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); int tcp_abort(struct sock *sk, int err); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; rx_opt->num_sacks = 0; } void tcp_cwnd_restart(struct sock *sk, s32 delta); static inline void tcp_slow_start_after_idle_check(struct sock *sk) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); s32 delta; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) tcp_cwnd_restart(sk, delta); } /* Determine a window scaling and initial window to offer. */ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); static inline int __tcp_win_from_space(u8 scaling_ratio, int space) { s64 scaled_space = (s64)space * scaling_ratio; return scaled_space >> TCP_RMEM_TO_WIN_SCALE; } static inline int tcp_win_from_space(const struct sock *sk, int space) { return __tcp_win_from_space(tcp_sk(sk)->scaling_ratio, space); } /* inverse of __tcp_win_from_space() */ static inline int __tcp_space_from_win(u8 scaling_ratio, int win) { u64 val = (u64)win << TCP_RMEM_TO_WIN_SCALE; do_div(val, scaling_ratio); return val; } static inline int tcp_space_from_win(const struct sock *sk, int win) { return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win); } /* Assume a 50% default for skb->len/skb->truesize ratio. * This may be adjusted later in tcp_measure_rcv_mss(). */ #define TCP_DEFAULT_SCALING_RATIO (1 << (TCP_RMEM_TO_WIN_SCALE - 1)) static inline void tcp_scaling_ratio_init(struct sock *sk) { tcp_sk(sk)->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; } /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - READ_ONCE(sk->sk_backlog.len) - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh) { int unused_mem = sk_unused_reserved_mem(sk); struct tcp_sock *tp = tcp_sk(sk); tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh); if (unused_mem) tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, tcp_win_from_space(sk, unused_mem)); } static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) { __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss); } void tcp_cleanup_rbuf(struct sock *sk, int copied); void __tcp_cleanup_rbuf(struct sock *sk, int copied); /* We provision sk_rcvbuf around 200% of sk_rcvlowat. * If 87.5 % (7/8) of the space has been consumed, we want to override * SO_RCVLOWAT constraint, since we are receiving skbs with too small * len/truesize ratio. */ static inline bool tcp_rmem_pressure(const struct sock *sk) { int rcvbuf, threshold; if (tcp_under_memory_pressure(sk)) return true; rcvbuf = READ_ONCE(sk->sk_rcvbuf); threshold = rcvbuf - (rcvbuf >> 3); return atomic_read(&sk->sk_rmem_alloc) > threshold; } static inline bool tcp_epollin_ready(const struct sock *sk, int target) { const struct tcp_sock *tp = tcp_sk(sk); int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); if (avail <= 0) return false; return (avail >= target) || tcp_rmem_pressure(sk) || (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss); } extern void tcp_openreq_init_rwin(struct request_sock *req, const struct sock *sk_listener, const struct dst_entry *dst); void tcp_enter_memory_pressure(struct sock *sk); void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); int val; /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl() * and do_tcp_setsockopt(). */ val = READ_ONCE(tp->keepalive_intvl); return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); int val; /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */ val = READ_ONCE(tp->keepalive_time); return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); int val; /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt() * and do_tcp_setsockopt(). */ val = READ_ONCE(tp->keepalive_probes); return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) { const struct inet_connection_sock *icsk = &tp->inet_conn; return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime, tcp_jiffies32 - tp->rcv_tstamp); } static inline int tcp_fin_time(const struct sock *sk) { int fin_timeout = tcp_sk(sk)->linger2 ? : READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) fin_timeout = (rto << 2) - (rto >> 1); return fin_timeout; } static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, int paws_win) { if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) return true; if (unlikely(!time_before32(ktime_get_seconds(), rx_opt->ts_recent_stamp + TCP_PAWS_WRAP))) return true; /* * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, * then following tcp messages have valid values. Ignore 0 value, * or else 'negative' tsval might forbid us to accept their packets. */ if (!rx_opt->ts_recent) return true; return false; } static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, int rst) { if (tcp_paws_check(rx_opt, 0)) return false; /* RST segments are not recommended to carry timestamp, and, if they do, it is recommended to ignore PAWS because "their cleanup function should take precedence over timestamps." Certainly, it is mistake. It is necessary to understand the reasons of this constraint to relax it: if peer reboots, clock may go out-of-sync and half-open connections will not be reset. Actually, the problem would be not existing if all the implementations followed draft about maintaining clock via reboots. Linux-2.2 DOES NOT! However, we can relax time bounds for RST segments to MSL. */ if (rst && !time_before32(ktime_get_seconds(), rx_opt->ts_recent_stamp + TCP_PAWS_MSL)) return false; return true; } bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, int mib_idx, u32 *last_oow_ack_time); static inline void tcp_mib_init(struct net *net) { /* See RFC 2012 */ TCP_ADD_STATS(net, TCP_MIB_RTOALGORITHM, 1); TCP_ADD_STATS(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); TCP_ADD_STATS(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1); } /* from STCP */ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; } static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) { tcp_clear_retrans_hints_partial(tp); tp->retransmit_skb_hint = NULL; } #define tcp_md5_addr tcp_ao_addr /* - key database */ struct tcp_md5sig_key { struct hlist_node node; u8 keylen; u8 family; /* AF_INET or AF_INET6 */ u8 prefixlen; u8 flags; union tcp_md5_addr addr; int l3index; /* set if key added with L3 scope */ u8 key[TCP_MD5SIG_MAXKEYLEN]; struct rcu_head rcu; }; /* - sock block */ struct tcp_md5sig_info { struct hlist_head head; struct rcu_head rcu; }; /* - pseudo header */ struct tcp4_pseudohdr { __be32 saddr; __be32 daddr; __u8 pad; __u8 protocol; __be16 len; }; struct tcp6_pseudohdr { struct in6_addr saddr; struct in6_addr daddr; __be32 len; __be32 protocol; /* including padding */ }; union tcp_md5sum_block { struct tcp4_pseudohdr ip4; #if IS_ENABLED(CONFIG_IPV6) struct tcp6_pseudohdr ip6; #endif }; /* * struct tcp_sigpool - per-CPU pool of ahash_requests * @scratch: per-CPU temporary area, that can be used between * tcp_sigpool_start() and tcp_sigpool_end() to perform * crypto request * @req: pre-allocated ahash request */ struct tcp_sigpool { void *scratch; struct ahash_request *req; }; int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size); void tcp_sigpool_get(unsigned int id); void tcp_sigpool_release(unsigned int id); int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, const struct sk_buff *skb, unsigned int header_len); /** * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() * @c: returned tcp_sigpool for usage (uninitialized on failure) * * Returns: 0 on success, error otherwise. */ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); /** * tcp_sigpool_end - enable bh and stop using tcp_sigpool * @c: tcp_sigpool context that was returned by tcp_sigpool_start() */ void tcp_sigpool_end(struct tcp_sigpool *c); size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags, const u8 *newkey, u8 newkeylen); int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, struct tcp_md5sig_key *key); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); void tcp_clear_md5_list(struct sock *sk); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family, bool any_l3index); static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; return __tcp_md5_do_lookup(sk, l3index, addr, family, false); } static inline struct tcp_md5sig_key * tcp_md5_do_lookup_any_l3index(const struct sock *sk, const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; return __tcp_md5_do_lookup(sk, 0, addr, family, true); } #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { return NULL; } static inline struct tcp_md5sig_key * tcp_md5_do_lookup_any_l3index(const struct sock *sk, const union tcp_md5_addr *addr, int family) { return NULL; } #define tcp_twsk_md5_key(twsk) NULL #endif int tcp_md5_alloc_sigpool(void); void tcp_md5_release_sigpool(void); void tcp_md5_add_sigpool(void); extern int tcp_md5_sigpool_id; int tcp_md5_hash_key(struct tcp_sigpool *hp, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost, u16 try_exp); struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; struct msghdr *data; /* data in MSG_FASTOPEN */ size_t size; int copied; /* queued in tcp_connect() */ struct ubuf_info *uarg; }; void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, u64 *key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, const struct dst_entry *dst); void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); #define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t) #define TCP_FASTOPEN_KEY_MAX 2 #define TCP_FASTOPEN_KEY_BUF_LENGTH \ (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) /* Fastopen key context */ struct tcp_fastopen_context { siphash_key_t key[TCP_FASTOPEN_KEY_MAX]; int num; struct rcu_head rcu; }; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); /* Caller needs to wrap with rcu_read_(un)lock() */ static inline struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) { struct tcp_fastopen_context *ctx; ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); if (!ctx) ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); return ctx; } static inline bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, const struct tcp_fastopen_cookie *orig) { if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && orig->len == foc->len && !memcmp(orig->val, foc->val, foc->len)) return true; return false; } static inline int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) { return ctx->num; } /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ enum tcp_chrono { TCP_CHRONO_UNSPEC, TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */ TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */ TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */ __TCP_CHRONO_MAX, }; void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); /* This helper is needed, because skb->tcp_tsorted_anchor uses * the same memory storage than skb->destructor/_skb_refdst */ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) { skb->destructor = NULL; skb->_skb_refdst = 0UL; } #define tcp_skb_tsorted_save(skb) { \ unsigned long _save = skb->_skb_refdst; \ skb->_skb_refdst = 0UL; #define tcp_skb_tsorted_restore(skb) \ skb->_skb_refdst = _save; \ } void tcp_write_queue_purge(struct sock *sk); static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) { return skb_rb_first(&sk->tcp_rtx_queue); } static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) { return skb_rb_last(&sk->tcp_rtx_queue); } static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { return skb_peek_tail(&sk->sk_write_queue); } #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(const struct sock *sk) { return skb_peek(&sk->sk_write_queue); } static inline bool tcp_skb_is_last(const struct sock *sk, const struct sk_buff *skb) { return skb_queue_is_last(&sk->sk_write_queue, skb); } /** * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue * @sk: socket * * Since the write queue can have a temporary empty skb in it, * we must not use "return skb_queue_empty(&sk->sk_write_queue)" */ static inline bool tcp_write_queue_empty(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return tp->write_seq == tp->snd_nxt; } static inline bool tcp_rtx_queue_empty(const struct sock *sk) { return RB_EMPTY_ROOT(&sk->tcp_rtx_queue); } static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk) { return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk); } static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { __skb_queue_tail(&sk->sk_write_queue, skb); /* Queue it, remembering where we must start sending. */ if (sk->sk_write_queue.next == skb) tcp_chrono_start(sk, TCP_CHRONO_BUSY); } /* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { __skb_queue_before(&sk->sk_write_queue, skb, new); } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { tcp_skb_tsorted_anchor_cleanup(skb); __skb_unlink(skb, &sk->sk_write_queue); } void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb); static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk) { tcp_skb_tsorted_anchor_cleanup(skb); rb_erase(&skb->rbnode, &sk->tcp_rtx_queue); } static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk) { list_del(&skb->tcp_tsorted_anchor); tcp_rtx_queue_unlink(skb, sk); tcp_wmem_free_skb(sk, skb); } static inline void tcp_write_collapse_fence(struct sock *sk) { struct sk_buff *skb = tcp_write_queue_tail(sk); if (skb) TCP_SKB_CB(skb)->eor = 1; } static inline void tcp_push_pending_frames(struct sock *sk) { if (tcp_send_head(sk)) { struct tcp_sock *tp = tcp_sk(sk); __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); } } /* Start sequence of the skb just after the highest skb with SACKed * bit, valid only if sacked_out > 0 or when the caller has ensured * validity by itself. */ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) { if (!tp->sacked_out) return tp->snd_una; if (tp->highest_sack == NULL) return tp->snd_nxt; return TCP_SKB_CB(tp->highest_sack)->seq; } static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) { tcp_sk(sk)->highest_sack = skb_rb_next(skb); } static inline struct sk_buff *tcp_highest_sack(struct sock *sk) { return tcp_sk(sk)->highest_sack; } static inline void tcp_highest_sack_reset(struct sock *sk) { tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk); } /* Called when old skb is about to be deleted and replaced by new skb */ static inline void tcp_highest_sack_replace(struct sock *sk, struct sk_buff *old, struct sk_buff *new) { if (old == tcp_highest_sack(sk)) tcp_sk(sk)->highest_sack = new; } /* This helper checks if socket has IP_TRANSPARENT set */ static inline bool inet_sk_transparent(const struct sock *sk) { switch (sk->sk_state) { case TCP_TIME_WAIT: return inet_twsk(sk)->tw_transparent; case TCP_NEW_SYN_RECV: return inet_rsk(inet_reqsk(sk))->no_srccheck; } return inet_test_bit(TRANSPARENT, sk); } /* Determines whether this is a thin stream (which may suffer from * increased latency). Used to trigger latency-reducing mechanisms. */ static inline bool tcp_stream_is_thin(struct tcp_sock *tp) { return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); } /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, TCP_SEQ_STATE_ESTABLISHED, }; void *tcp_seq_start(struct seq_file *seq, loff_t *pos); void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos); void tcp_seq_stop(struct seq_file *seq, void *v); struct tcp_seq_afinfo { sa_family_t family; }; struct tcp_iter_state { struct seq_net_private p; enum tcp_seq_states state; struct sock *syn_wait_sk; int bucket, offset, sbucket, num; loff_t last_pos; }; extern struct request_sock_ops tcp_request_sock_ops; extern struct request_sock_ops tcp6_request_sock_ops; void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); #ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); #else static inline void tcp_gro_complete(struct sk_buff *skb) { } #endif void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); u32 val; val = READ_ONCE(tp->notsent_lowat); return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); #ifdef CONFIG_PROC_FS int tcp4_proc_init(void); void tcp4_proc_exit(void); #endif int tcp_rtx_synack(const struct sock *sk, struct request_sock *req); int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); /* TCP af-specific functions */ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk, const struct sock *addr_sk); int (*calc_md5_hash)(char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, const struct sk_buff *skb); int (*md5_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); #endif #ifdef CONFIG_TCP_AO int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao, const struct sock *sk, const struct sk_buff *skb, const u8 *tkey, int hash_offset, u32 sne); #endif }; struct tcp_request_sock_ops { u16 mss_clamp; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk, const struct sock *addr_sk); int (*calc_md5_hash) (char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, const struct sk_buff *skb); #endif #ifdef CONFIG_TCP_AO struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, struct request_sock *req, int sndid, int rcvid); int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk); int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt, struct request_sock *req, const struct sk_buff *skb, int hash_offset, u32 sne); #endif #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, __u16 *mss); #endif struct dst_entry *(*route_req)(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct request_sock *req, u32 tw_isn); u32 (*init_seq)(const struct sk_buff *skb); u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb); }; extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; #if IS_ENABLED(CONFIG_IPV6) extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; #endif #ifdef CONFIG_SYN_COOKIES static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, __u16 *mss) { tcp_synq_overflow(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return ops->cookie_init_seq(skb, mss); } #else static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, __u16 *mss) { return 0; } #endif struct tcp_key { union { struct { struct tcp_ao_key *ao_key; char *traffic_key; u32 sne; u8 rcv_next; }; struct tcp_md5sig_key *md5_key; }; enum { TCP_KEY_NONE = 0, TCP_KEY_MD5, TCP_KEY_AO, } type; }; static inline void tcp_get_current_key(const struct sock *sk, struct tcp_key *out) { #if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG) const struct tcp_sock *tp = tcp_sk(sk); #endif #ifdef CONFIG_TCP_AO if (static_branch_unlikely(&tcp_ao_needed.key)) { struct tcp_ao_info *ao; ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held(sk)); if (ao) { out->ao_key = READ_ONCE(ao->current_key); out->type = TCP_KEY_AO; return; } } #endif #ifdef CONFIG_TCP_MD5SIG if (static_branch_unlikely(&tcp_md5_needed.key) && rcu_access_pointer(tp->md5sig_info)) { out->md5_key = tp->af_specific->md5_lookup(sk, sk); if (out->md5_key) { out->type = TCP_KEY_MD5; return; } } #endif out->type = TCP_KEY_NONE; } static inline bool tcp_key_is_md5(const struct tcp_key *key) { if (static_branch_tcp_md5()) return key->type == TCP_KEY_MD5; return false; } static inline bool tcp_key_is_ao(const struct tcp_key *key) { if (static_branch_tcp_ao()) return key->type == TCP_KEY_AO; return false; } int tcpv4_offload_init(void); void tcp_v4_init(void); void tcp_init(void); /* tcp_recovery.c */ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb); void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); extern bool tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* tcp_plb.c */ /* * Scaling factor for fractions in PLB. For example, tcp_plb_update_state * expects cong_ratio which represents fraction of traffic that experienced * congestion over a single RTT. In order to avoid floating point operations, * this fraction should be mapped to (1 << TCP_PLB_SCALE) and passed in. */ #define TCP_PLB_SCALE 8 /* State for PLB (Protective Load Balancing) for a single TCP connection. */ struct tcp_plb_state { u8 consec_cong_rounds:5, /* consecutive congested rounds */ unused:3; u32 pause_until; /* jiffies32 when PLB can resume rerouting */ }; static inline void tcp_plb_init(const struct sock *sk, struct tcp_plb_state *plb) { plb->consec_cong_rounds = 0; plb->pause_until = 0; } void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, const int cong_ratio); void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb); void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb); static inline void tcp_warn_once(const struct sock *sk, bool cond, const char *str) { WARN_ONCE(cond, "%scwn:%u out:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n", str, tcp_snd_cwnd(tcp_sk(sk)), tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, tcp_sk(sk)->tlp_high_seq, sk->sk_state, inet_csk(sk)->icsk_ca_state, tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache, inet_csk(sk)->icsk_pmtu_cookie); } /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; if (likely(skb)) { u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; } else { tcp_warn_once(sk, 1, "rtx queue empty: "); return jiffies_to_usecs(rto); } } /* * Save and compile IPv4 options, return a pointer to it */ static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net, struct sk_buff *skb) { const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct ip_options_rcu *dopt = NULL; if (opt->optlen) { int opt_size = sizeof(*dopt) + opt->optlen; dopt = kmalloc(opt_size, GFP_ATOMIC); if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) { kfree(dopt); dopt = NULL; } } return dopt; } /* locally generated TCP pure ACKs have skb->truesize == 2 * (check tcp_send_ack() in net/ipv4/tcp_output.c ) * This is much faster than dissecting the packet to find out. * (Think of GRE encapsulations, IPv4, IPv6, ...) */ static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb) { return skb->truesize == 2; } static inline void skb_set_tcp_pure_ack(struct sk_buff *skb) { skb->truesize = 2; } static inline int tcp_inq(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int answ; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { answ = 0; } else if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { answ = tp->rcv_nxt - tp->copied_seq; /* Subtract 1, if FIN was received */ if (answ && sock_flag(sk, SOCK_DONE)) answ--; } else { answ = tp->urg_seq - tp->copied_seq; } return answ; } int tcp_peek_len(struct socket *sock); static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) { u16 segs_in; segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); /* We update these fields while other threads might * read them from tcp_get_info() */ WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in); if (skb->len > tcp_hdrlen(skb)) WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in); } /* * TCP listen path runs lockless. * We forced "struct sock" to be const qualified to make sure * we don't modify one of its field by mistake. * Here, we increment sk_drops which is an atomic_t, so we can safely * make sock writable again. */ static inline void tcp_listendrop(const struct sock *sk) { atomic_inc(&((struct sock *)sk)->sk_drops); __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); /* * Interface for adding Upper Level Protocols over TCP */ #define TCP_ULP_NAME_MAX 16 #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) struct tcp_ulp_ops { struct list_head list; /* initialize ulp */ int (*init)(struct sock *sk); /* update ulp */ void (*update)(struct sock *sk, struct proto *p, void (*write_space)(struct sock *sk)); /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, const gfp_t priority); char name[TCP_ULP_NAME_MAX]; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); void tcp_update_ulp(struct sock *sk, struct proto *p, void (*write_space)(struct sock *sk)); #define MODULE_ALIAS_TCP_ULP(name) \ __MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) #ifdef CONFIG_NET_SOCK_MSG struct sk_msg; struct sk_psock; #ifdef CONFIG_BPF_SYSCALL int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); #endif /* CONFIG_BPF_SYSCALL */ #ifdef CONFIG_INET void tcp_eat_skb(struct sock *sk, struct sk_buff *skb); #else static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb) { } #endif int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, struct sk_msg *msg, u32 bytes, int flags); #endif /* CONFIG_NET_SOCK_MSG */ #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG) static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) { } #endif #ifdef CONFIG_CGROUP_BPF static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, struct sk_buff *skb, unsigned int end_offset) { skops->skb = skb; skops->skb_data_end = skb->data + end_offset; } #else static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, struct sk_buff *skb, unsigned int end_offset) { } #endif /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF * program loaded). */ #ifdef CONFIG_BPF static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { struct bpf_sock_ops_kern sock_ops; int ret; memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); if (sk_fullsock(sk)) { sock_ops.is_fullsock = 1; sock_owned_by_me(sk); } sock_ops.sk = sk; sock_ops.op = op; if (nargs > 0) memcpy(sock_ops.args, args, nargs * sizeof(*args)); ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); if (ret == 0) ret = sock_ops.reply; else ret = -1; return ret; } static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) { u32 args[2] = {arg1, arg2}; return tcp_call_bpf(sk, op, 2, args); } static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, u32 arg3) { u32 args[3] = {arg1, arg2, arg3}; return tcp_call_bpf(sk, op, 3, args); } #else static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { return -EPERM; } static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2) { return -EPERM; } static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2, u32 arg3) { return -EPERM; } #endif static inline u32 tcp_timeout_init(struct sock *sk) { int timeout; timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL); if (timeout <= 0) timeout = TCP_TIMEOUT_INIT; return min_t(int, timeout, TCP_RTO_MAX); } static inline u32 tcp_rwnd_init_bpf(struct sock *sk) { int rwnd; rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL); if (rwnd < 0) rwnd = 0; return rwnd; } static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) { return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } static inline void tcp_bpf_rtt(struct sock *sk, long mrtt, u32 srtt) { if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_RTT_CB, mrtt, srtt); } #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif #if IS_ENABLED(CONFIG_TLS_DEVICE) void clean_acked_data_enable(struct inet_connection_sock *icsk, void (*cad)(struct sock *sk, u32 ack_seq)); void clean_acked_data_disable(struct inet_connection_sock *icsk); void clean_acked_data_flush(void); #endif DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); static inline void tcp_add_tx_delay(struct sk_buff *skb, const struct tcp_sock *tp) { if (static_branch_unlikely(&tcp_tx_delay_enabled)) skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; } /* Compute Earliest Departure Time for some control packets * like ACK or RST for TIME_WAIT or non ESTABLISHED sockets. */ static inline u64 tcp_transmit_time(const struct sock *sk) { if (static_branch_unlikely(&tcp_tx_delay_enabled)) { u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; } return 0; } static inline int tcp_parse_auth_options(const struct tcphdr *th, const u8 **md5_hash, const struct tcp_ao_hdr **aoh) { const u8 *md5_tmp, *ao_tmp; int ret; ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp); if (ret) return ret; if (md5_hash) *md5_hash = md5_tmp; if (aoh) { if (!ao_tmp) *aoh = NULL; else *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2); } return 0; } static inline bool tcp_ao_required(struct sock *sk, const void *saddr, int family, int l3index, bool stat_inc) { #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; struct tcp_ao_key *ao_key; if (!static_branch_unlikely(&tcp_ao_needed.key)) return false; ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk)); if (!ao_info) return false; ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1); if (ao_info->ao_required || ao_key) { if (stat_inc) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED); atomic64_inc(&ao_info->counters.ao_required); } return true; } #endif return false; } enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const struct request_sock *req, const struct sk_buff *skb, const void *saddr, const void *daddr, int family, int dif, int sdif); #endif /* _TCP_H */ |
22 9 12 12 12 12 12 12 12 12 12 12 1 1 1 22 2 14 14 1 1 1 1 1 1 2 2 2 2 2 42 3 1 13 1 1 1 1 1 8 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 1 2 1 1 3 1 1 1 15 1 1 1 2 1 6 6 6 6 6 1 1 2 2 2 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 | // SPDX-License-Identifier: GPL-2.0-only /* * vivid-ctrls.c - control support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/videodev2.h> #include <media/v4l2-event.h> #include <media/v4l2-common.h> #include "vivid-core.h" #include "vivid-vid-cap.h" #include "vivid-vid-out.h" #include "vivid-vid-common.h" #include "vivid-radio-common.h" #include "vivid-osd.h" #include "vivid-ctrls.h" #define VIVID_CID_CUSTOM_BASE (V4L2_CID_USER_BASE | 0xf000) #define VIVID_CID_BUTTON (VIVID_CID_CUSTOM_BASE + 0) #define VIVID_CID_BOOLEAN (VIVID_CID_CUSTOM_BASE + 1) #define VIVID_CID_INTEGER (VIVID_CID_CUSTOM_BASE + 2) #define VIVID_CID_INTEGER64 (VIVID_CID_CUSTOM_BASE + 3) #define VIVID_CID_MENU (VIVID_CID_CUSTOM_BASE + 4) #define VIVID_CID_STRING (VIVID_CID_CUSTOM_BASE + 5) #define VIVID_CID_BITMASK (VIVID_CID_CUSTOM_BASE + 6) #define VIVID_CID_INTMENU (VIVID_CID_CUSTOM_BASE + 7) #define VIVID_CID_U32_ARRAY (VIVID_CID_CUSTOM_BASE + 8) #define VIVID_CID_U16_MATRIX (VIVID_CID_CUSTOM_BASE + 9) #define VIVID_CID_U8_4D_ARRAY (VIVID_CID_CUSTOM_BASE + 10) #define VIVID_CID_AREA (VIVID_CID_CUSTOM_BASE + 11) #define VIVID_CID_RO_INTEGER (VIVID_CID_CUSTOM_BASE + 12) #define VIVID_CID_U32_DYN_ARRAY (VIVID_CID_CUSTOM_BASE + 13) #define VIVID_CID_U8_PIXEL_ARRAY (VIVID_CID_CUSTOM_BASE + 14) #define VIVID_CID_S32_ARRAY (VIVID_CID_CUSTOM_BASE + 15) #define VIVID_CID_S64_ARRAY (VIVID_CID_CUSTOM_BASE + 16) #define VIVID_CID_VIVID_BASE (0x00f00000 | 0xf000) #define VIVID_CID_VIVID_CLASS (0x00f00000 | 1) #define VIVID_CID_TEST_PATTERN (VIVID_CID_VIVID_BASE + 0) #define VIVID_CID_OSD_TEXT_MODE (VIVID_CID_VIVID_BASE + 1) #define VIVID_CID_HOR_MOVEMENT (VIVID_CID_VIVID_BASE + 2) #define VIVID_CID_VERT_MOVEMENT (VIVID_CID_VIVID_BASE + 3) #define VIVID_CID_SHOW_BORDER (VIVID_CID_VIVID_BASE + 4) #define VIVID_CID_SHOW_SQUARE (VIVID_CID_VIVID_BASE + 5) #define VIVID_CID_INSERT_SAV (VIVID_CID_VIVID_BASE + 6) #define VIVID_CID_INSERT_EAV (VIVID_CID_VIVID_BASE + 7) #define VIVID_CID_VBI_CAP_INTERLACED (VIVID_CID_VIVID_BASE + 8) #define VIVID_CID_INSERT_HDMI_VIDEO_GUARD_BAND (VIVID_CID_VIVID_BASE + 9) #define VIVID_CID_HFLIP (VIVID_CID_VIVID_BASE + 20) #define VIVID_CID_VFLIP (VIVID_CID_VIVID_BASE + 21) #define VIVID_CID_STD_ASPECT_RATIO (VIVID_CID_VIVID_BASE + 22) #define VIVID_CID_DV_TIMINGS_ASPECT_RATIO (VIVID_CID_VIVID_BASE + 23) #define VIVID_CID_TSTAMP_SRC (VIVID_CID_VIVID_BASE + 24) #define VIVID_CID_COLORSPACE (VIVID_CID_VIVID_BASE + 25) #define VIVID_CID_XFER_FUNC (VIVID_CID_VIVID_BASE + 26) #define VIVID_CID_YCBCR_ENC (VIVID_CID_VIVID_BASE + 27) #define VIVID_CID_QUANTIZATION (VIVID_CID_VIVID_BASE + 28) #define VIVID_CID_LIMITED_RGB_RANGE (VIVID_CID_VIVID_BASE + 29) #define VIVID_CID_ALPHA_MODE (VIVID_CID_VIVID_BASE + 30) #define VIVID_CID_HAS_CROP_CAP (VIVID_CID_VIVID_BASE + 31) #define VIVID_CID_HAS_COMPOSE_CAP (VIVID_CID_VIVID_BASE + 32) #define VIVID_CID_HAS_SCALER_CAP (VIVID_CID_VIVID_BASE + 33) #define VIVID_CID_HAS_CROP_OUT (VIVID_CID_VIVID_BASE + 34) #define VIVID_CID_HAS_COMPOSE_OUT (VIVID_CID_VIVID_BASE + 35) #define VIVID_CID_HAS_SCALER_OUT (VIVID_CID_VIVID_BASE + 36) #define VIVID_CID_SEQ_WRAP (VIVID_CID_VIVID_BASE + 38) #define VIVID_CID_TIME_WRAP (VIVID_CID_VIVID_BASE + 39) #define VIVID_CID_MAX_EDID_BLOCKS (VIVID_CID_VIVID_BASE + 40) #define VIVID_CID_PERCENTAGE_FILL (VIVID_CID_VIVID_BASE + 41) #define VIVID_CID_REDUCED_FPS (VIVID_CID_VIVID_BASE + 42) #define VIVID_CID_HSV_ENC (VIVID_CID_VIVID_BASE + 43) #define VIVID_CID_STD_SIGNAL_MODE (VIVID_CID_VIVID_BASE + 60) #define VIVID_CID_STANDARD (VIVID_CID_VIVID_BASE + 61) #define VIVID_CID_DV_TIMINGS_SIGNAL_MODE (VIVID_CID_VIVID_BASE + 62) #define VIVID_CID_DV_TIMINGS (VIVID_CID_VIVID_BASE + 63) #define VIVID_CID_PERC_DROPPED (VIVID_CID_VIVID_BASE + 64) #define VIVID_CID_DISCONNECT (VIVID_CID_VIVID_BASE + 65) #define VIVID_CID_DQBUF_ERROR (VIVID_CID_VIVID_BASE + 66) #define VIVID_CID_QUEUE_SETUP_ERROR (VIVID_CID_VIVID_BASE + 67) #define VIVID_CID_BUF_PREPARE_ERROR (VIVID_CID_VIVID_BASE + 68) #define VIVID_CID_START_STR_ERROR (VIVID_CID_VIVID_BASE + 69) #define VIVID_CID_QUEUE_ERROR (VIVID_CID_VIVID_BASE + 70) #define VIVID_CID_CLEAR_FB (VIVID_CID_VIVID_BASE + 71) #define VIVID_CID_REQ_VALIDATE_ERROR (VIVID_CID_VIVID_BASE + 72) #define VIVID_CID_RADIO_SEEK_MODE (VIVID_CID_VIVID_BASE + 90) #define VIVID_CID_RADIO_SEEK_PROG_LIM (VIVID_CID_VIVID_BASE + 91) #define VIVID_CID_RADIO_RX_RDS_RBDS (VIVID_CID_VIVID_BASE + 92) #define VIVID_CID_RADIO_RX_RDS_BLOCKIO (VIVID_CID_VIVID_BASE + 93) #define VIVID_CID_RADIO_TX_RDS_BLOCKIO (VIVID_CID_VIVID_BASE + 94) #define VIVID_CID_SDR_CAP_FM_DEVIATION (VIVID_CID_VIVID_BASE + 110) #define VIVID_CID_META_CAP_GENERATE_PTS (VIVID_CID_VIVID_BASE + 111) #define VIVID_CID_META_CAP_GENERATE_SCR (VIVID_CID_VIVID_BASE + 112) /* HDMI inputs are in the range 0-14. The next available CID is VIVID_CID_VIVID_BASE + 128 */ #define VIVID_CID_HDMI_IS_CONNECTED_TO_OUTPUT(input) (VIVID_CID_VIVID_BASE + 113 + (input)) /* S-Video inputs are in the range 0-15. The next available CID is VIVID_CID_VIVID_BASE + 144 */ #define VIVID_CID_SVID_IS_CONNECTED_TO_OUTPUT(input) (VIVID_CID_VIVID_BASE + 128 + (input)) /* General User Controls */ static void vivid_unregister_dev(bool valid, struct video_device *vdev) { if (!valid) return; clear_bit(V4L2_FL_REGISTERED, &vdev->flags); v4l2_event_wake_all(vdev); } static int vivid_user_gen_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_user_gen); switch (ctrl->id) { case VIVID_CID_DISCONNECT: v4l2_info(&dev->v4l2_dev, "disconnect\n"); dev->disconnect_error = true; vivid_unregister_dev(dev->has_vid_cap, &dev->vid_cap_dev); vivid_unregister_dev(dev->has_vid_out, &dev->vid_out_dev); vivid_unregister_dev(dev->has_vbi_cap, &dev->vbi_cap_dev); vivid_unregister_dev(dev->has_vbi_out, &dev->vbi_out_dev); vivid_unregister_dev(dev->has_radio_rx, &dev->radio_rx_dev); vivid_unregister_dev(dev->has_radio_tx, &dev->radio_tx_dev); vivid_unregister_dev(dev->has_sdr_cap, &dev->sdr_cap_dev); vivid_unregister_dev(dev->has_meta_cap, &dev->meta_cap_dev); vivid_unregister_dev(dev->has_meta_out, &dev->meta_out_dev); vivid_unregister_dev(dev->has_touch_cap, &dev->touch_cap_dev); break; case VIVID_CID_BUTTON: dev->button_pressed = 30; break; } return 0; } static const struct v4l2_ctrl_ops vivid_user_gen_ctrl_ops = { .s_ctrl = vivid_user_gen_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_button = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_BUTTON, .name = "Button", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_boolean = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_BOOLEAN, .name = "Boolean", .type = V4L2_CTRL_TYPE_BOOLEAN, .min = 0, .max = 1, .step = 1, .def = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_int32 = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_INTEGER, .name = "Integer 32 Bits", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0xffffffff80000000ULL, .max = 0x7fffffff, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_int64 = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_INTEGER64, .name = "Integer 64 Bits", .type = V4L2_CTRL_TYPE_INTEGER64, .min = 0x8000000000000000ULL, .max = 0x7fffffffffffffffLL, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_u32_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U32_ARRAY, .name = "U32 1 Element Array", .type = V4L2_CTRL_TYPE_U32, .def = 0x18, .min = 0x10, .max = 0x20000, .step = 1, .dims = { 1 }, }; static const struct v4l2_ctrl_config vivid_ctrl_u32_dyn_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U32_DYN_ARRAY, .name = "U32 Dynamic Array", .type = V4L2_CTRL_TYPE_U32, .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY, .def = 50, .min = 10, .max = 90, .step = 1, .dims = { 100 }, }; static const struct v4l2_ctrl_config vivid_ctrl_u16_matrix = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U16_MATRIX, .name = "U16 8x16 Matrix", .type = V4L2_CTRL_TYPE_U16, .def = 0x18, .min = 0x10, .max = 0x2000, .step = 1, .dims = { 8, 16 }, }; static const struct v4l2_ctrl_config vivid_ctrl_u8_4d_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U8_4D_ARRAY, .name = "U8 2x3x4x5 Array", .type = V4L2_CTRL_TYPE_U8, .def = 0x18, .min = 0x10, .max = 0x20, .step = 1, .dims = { 2, 3, 4, 5 }, }; static const struct v4l2_ctrl_config vivid_ctrl_u8_pixel_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_U8_PIXEL_ARRAY, .name = "U8 Pixel Array", .type = V4L2_CTRL_TYPE_U8, .def = 0x80, .min = 0x00, .max = 0xff, .step = 1, .dims = { 640 / PIXEL_ARRAY_DIV, 360 / PIXEL_ARRAY_DIV }, }; static const struct v4l2_ctrl_config vivid_ctrl_s32_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_S32_ARRAY, .name = "S32 2 Element Array", .type = V4L2_CTRL_TYPE_INTEGER, .def = 2, .min = -10, .max = 10, .step = 1, .dims = { 2 }, }; static const struct v4l2_ctrl_config vivid_ctrl_s64_array = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_S64_ARRAY, .name = "S64 5 Element Array", .type = V4L2_CTRL_TYPE_INTEGER64, .def = 4, .min = -10, .max = 10, .step = 1, .dims = { 5 }, }; static const char * const vivid_ctrl_menu_strings[] = { "Menu Item 0 (Skipped)", "Menu Item 1", "Menu Item 2 (Skipped)", "Menu Item 3", "Menu Item 4", "Menu Item 5 (Skipped)", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_menu = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_MENU, .name = "Menu", .type = V4L2_CTRL_TYPE_MENU, .min = 1, .max = 4, .def = 3, .menu_skip_mask = 0x04, .qmenu = vivid_ctrl_menu_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_string = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_STRING, .name = "String", .type = V4L2_CTRL_TYPE_STRING, .min = 2, .max = 4, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_bitmask = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_BITMASK, .name = "Bitmask", .type = V4L2_CTRL_TYPE_BITMASK, .def = 0x80002000, .min = 0, .max = 0x80402010, .step = 0, }; static const s64 vivid_ctrl_int_menu_values[] = { 1, 1, 2, 3, 5, 8, 13, 21, 42, }; static const struct v4l2_ctrl_config vivid_ctrl_int_menu = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_INTMENU, .name = "Integer Menu", .type = V4L2_CTRL_TYPE_INTEGER_MENU, .min = 1, .max = 8, .def = 4, .menu_skip_mask = 0x02, .qmenu_int = vivid_ctrl_int_menu_values, }; static const struct v4l2_ctrl_config vivid_ctrl_disconnect = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_DISCONNECT, .name = "Disconnect", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_area area = { .width = 1000, .height = 2000, }; static const struct v4l2_ctrl_config vivid_ctrl_area = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_AREA, .name = "Area", .type = V4L2_CTRL_TYPE_AREA, .p_def.p_const = &area, }; static const struct v4l2_ctrl_config vivid_ctrl_ro_int32 = { .ops = &vivid_user_gen_ctrl_ops, .id = VIVID_CID_RO_INTEGER, .name = "Read-Only Integer 32 Bits", .type = V4L2_CTRL_TYPE_INTEGER, .flags = V4L2_CTRL_FLAG_READ_ONLY, .min = 0, .max = 255, .step = 1, }; /* Framebuffer Controls */ static int vivid_fb_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_fb); switch (ctrl->id) { case VIVID_CID_CLEAR_FB: vivid_clear_fb(dev); break; } return 0; } static const struct v4l2_ctrl_ops vivid_fb_ctrl_ops = { .s_ctrl = vivid_fb_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_clear_fb = { .ops = &vivid_fb_ctrl_ops, .id = VIVID_CID_CLEAR_FB, .name = "Clear Framebuffer", .type = V4L2_CTRL_TYPE_BUTTON, }; /* Video User Controls */ static int vivid_user_vid_g_volatile_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_user_vid); switch (ctrl->id) { case V4L2_CID_AUTOGAIN: dev->gain->val = (jiffies_to_msecs(jiffies) / 1000) & 0xff; break; } return 0; } static int vivid_user_vid_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_user_vid); switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: dev->input_brightness[dev->input] = ctrl->val - dev->input * 128; tpg_s_brightness(&dev->tpg, dev->input_brightness[dev->input]); break; case V4L2_CID_CONTRAST: tpg_s_contrast(&dev->tpg, ctrl->val); break; case V4L2_CID_SATURATION: tpg_s_saturation(&dev->tpg, ctrl->val); break; case V4L2_CID_HUE: tpg_s_hue(&dev->tpg, ctrl->val); break; case V4L2_CID_HFLIP: dev->hflip = ctrl->val; tpg_s_hflip(&dev->tpg, dev->sensor_hflip ^ dev->hflip); break; case V4L2_CID_VFLIP: dev->vflip = ctrl->val; tpg_s_vflip(&dev->tpg, dev->sensor_vflip ^ dev->vflip); break; case V4L2_CID_ALPHA_COMPONENT: tpg_s_alpha_component(&dev->tpg, ctrl->val); break; } return 0; } static const struct v4l2_ctrl_ops vivid_user_vid_ctrl_ops = { .g_volatile_ctrl = vivid_user_vid_g_volatile_ctrl, .s_ctrl = vivid_user_vid_s_ctrl, }; /* Video Capture Controls */ static void vivid_update_power_present(struct vivid_dev *dev) { unsigned int i, j; dev->power_present = 0; for (i = 0, j = 0; i < ARRAY_SIZE(dev->dv_timings_signal_mode); i++) { if (dev->input_type[i] != HDMI) continue; /* * If connected to TPG or HDMI output, and the signal * mode is not NO_SIGNAL, then there is power present. */ if (dev->input_is_connected_to_output[i] != 1 && dev->dv_timings_signal_mode[i] != NO_SIGNAL) dev->power_present |= (1 << j); j++; } __v4l2_ctrl_s_ctrl(dev->ctrl_rx_power_present, dev->power_present); v4l2_ctrl_activate(dev->ctrl_dv_timings, dev->dv_timings_signal_mode[dev->input] == SELECTED_DV_TIMINGS); } static int vivid_vid_cap_s_ctrl(struct v4l2_ctrl *ctrl) { static const u32 colorspaces[] = { V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_REC709, V4L2_COLORSPACE_SRGB, V4L2_COLORSPACE_OPRGB, V4L2_COLORSPACE_BT2020, V4L2_COLORSPACE_DCI_P3, V4L2_COLORSPACE_SMPTE240M, V4L2_COLORSPACE_470_SYSTEM_M, V4L2_COLORSPACE_470_SYSTEM_BG, }; struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_vid_cap); unsigned int i; struct vivid_dev *output_inst = NULL; int index = 0; int hdmi_index, svid_index; s32 input_index = 0; switch (ctrl->id) { case VIVID_CID_TEST_PATTERN: vivid_update_quality(dev); tpg_s_pattern(&dev->tpg, ctrl->val); break; case VIVID_CID_COLORSPACE: tpg_s_colorspace(&dev->tpg, colorspaces[ctrl->val]); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_XFER_FUNC: tpg_s_xfer_func(&dev->tpg, ctrl->val); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_YCBCR_ENC: tpg_s_ycbcr_enc(&dev->tpg, ctrl->val); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_HSV_ENC: tpg_s_hsv_enc(&dev->tpg, ctrl->val ? V4L2_HSV_ENC_256 : V4L2_HSV_ENC_180); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case VIVID_CID_QUANTIZATION: tpg_s_quantization(&dev->tpg, ctrl->val); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); vivid_send_source_change(dev, HDMI); vivid_send_source_change(dev, WEBCAM); break; case V4L2_CID_DV_RX_RGB_RANGE: if (!vivid_is_hdmi_cap(dev)) break; tpg_s_rgb_range(&dev->tpg, ctrl->val); break; case VIVID_CID_LIMITED_RGB_RANGE: tpg_s_real_rgb_range(&dev->tpg, ctrl->val ? V4L2_DV_RGB_RANGE_LIMITED : V4L2_DV_RGB_RANGE_FULL); break; case VIVID_CID_ALPHA_MODE: tpg_s_alpha_mode(&dev->tpg, ctrl->val); break; case VIVID_CID_HOR_MOVEMENT: tpg_s_mv_hor_mode(&dev->tpg, ctrl->val); break; case VIVID_CID_VERT_MOVEMENT: tpg_s_mv_vert_mode(&dev->tpg, ctrl->val); break; case VIVID_CID_OSD_TEXT_MODE: dev->osd_mode = ctrl->val; break; case VIVID_CID_PERCENTAGE_FILL: tpg_s_perc_fill(&dev->tpg, ctrl->val); for (i = 0; i < MAX_VID_CAP_BUFFERS; i++) dev->must_blank[i] = ctrl->val < 100; break; case VIVID_CID_INSERT_SAV: tpg_s_insert_sav(&dev->tpg, ctrl->val); break; case VIVID_CID_INSERT_EAV: tpg_s_insert_eav(&dev->tpg, ctrl->val); break; case VIVID_CID_INSERT_HDMI_VIDEO_GUARD_BAND: tpg_s_insert_hdmi_video_guard_band(&dev->tpg, ctrl->val); break; case VIVID_CID_HFLIP: dev->sensor_hflip = ctrl->val; tpg_s_hflip(&dev->tpg, dev->sensor_hflip ^ dev->hflip); break; case VIVID_CID_VFLIP: dev->sensor_vflip = ctrl->val; tpg_s_vflip(&dev->tpg, dev->sensor_vflip ^ dev->vflip); break; case VIVID_CID_REDUCED_FPS: dev->reduced_fps = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_HAS_CROP_CAP: dev->has_crop_cap = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_HAS_COMPOSE_CAP: dev->has_compose_cap = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_HAS_SCALER_CAP: dev->has_scaler_cap = ctrl->val; vivid_update_format_cap(dev, true); break; case VIVID_CID_SHOW_BORDER: tpg_s_show_border(&dev->tpg, ctrl->val); break; case VIVID_CID_SHOW_SQUARE: tpg_s_show_square(&dev->tpg, ctrl->val); break; case VIVID_CID_STD_ASPECT_RATIO: dev->std_aspect_ratio[dev->input] = ctrl->val; tpg_s_video_aspect(&dev->tpg, vivid_get_video_aspect(dev)); break; case VIVID_CID_DV_TIMINGS_SIGNAL_MODE: dev->dv_timings_signal_mode[dev->input] = dev->ctrl_dv_timings_signal_mode->val; dev->query_dv_timings[dev->input] = dev->ctrl_dv_timings->val; vivid_update_power_present(dev); vivid_update_quality(dev); vivid_send_input_source_change(dev, dev->input); break; case VIVID_CID_DV_TIMINGS_ASPECT_RATIO: dev->dv_timings_aspect_ratio[dev->input] = ctrl->val; tpg_s_video_aspect(&dev->tpg, vivid_get_video_aspect(dev)); break; case VIVID_CID_TSTAMP_SRC: dev->tstamp_src_is_soe = ctrl->val; dev->vb_vid_cap_q.timestamp_flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; if (dev->tstamp_src_is_soe) dev->vb_vid_cap_q.timestamp_flags |= V4L2_BUF_FLAG_TSTAMP_SRC_SOE; break; case VIVID_CID_MAX_EDID_BLOCKS: dev->edid_max_blocks = ctrl->val; if (dev->edid_blocks > dev->edid_max_blocks) dev->edid_blocks = dev->edid_max_blocks; break; case VIVID_CID_HDMI_IS_CONNECTED_TO_OUTPUT(0) ... VIVID_CID_HDMI_IS_CONNECTED_TO_OUTPUT(14): hdmi_index = ctrl->id - VIVID_CID_HDMI_IS_CONNECTED_TO_OUTPUT(0); output_inst = vivid_ctrl_hdmi_to_output_instance[ctrl->cur.val]; index = vivid_ctrl_hdmi_to_output_index[ctrl->cur.val]; input_index = dev->hdmi_index_to_input_index[hdmi_index]; dev->input_is_connected_to_output[input_index] = ctrl->val; if (output_inst) { output_inst->output_to_input_instance[index] = NULL; vivid_update_outputs(output_inst); cec_phys_addr_invalidate(output_inst->cec_tx_adap[index]); } if (ctrl->val >= FIXED_MENU_ITEMS) { output_inst = vivid_ctrl_hdmi_to_output_instance[ctrl->val]; index = vivid_ctrl_hdmi_to_output_index[ctrl->val]; output_inst->output_to_input_instance[index] = dev; output_inst->output_to_input_index[index] = dev->hdmi_index_to_input_index[hdmi_index]; } spin_lock(&hdmi_output_skip_mask_lock); hdmi_to_output_menu_skip_mask &= ~(1ULL << ctrl->cur.val); if (ctrl->val >= FIXED_MENU_ITEMS) hdmi_to_output_menu_skip_mask |= 1ULL << ctrl->val; spin_unlock(&hdmi_output_skip_mask_lock); vivid_update_power_present(dev); vivid_update_quality(dev); vivid_send_input_source_change(dev, dev->hdmi_index_to_input_index[hdmi_index]); if (ctrl->val < FIXED_MENU_ITEMS && ctrl->cur.val < FIXED_MENU_ITEMS) break; spin_lock(&hdmi_output_skip_mask_lock); hdmi_input_update_outputs_mask |= 1 << dev->inst; spin_unlock(&hdmi_output_skip_mask_lock); queue_work(update_hdmi_ctrls_workqueue, &dev->update_hdmi_ctrl_work); break; case VIVID_CID_SVID_IS_CONNECTED_TO_OUTPUT(0) ... VIVID_CID_SVID_IS_CONNECTED_TO_OUTPUT(15): svid_index = ctrl->id - VIVID_CID_SVID_IS_CONNECTED_TO_OUTPUT(0); output_inst = vivid_ctrl_svid_to_output_instance[ctrl->cur.val]; index = vivid_ctrl_svid_to_output_index[ctrl->cur.val]; input_index = dev->svid_index_to_input_index[svid_index]; dev->input_is_connected_to_output[input_index] = ctrl->val; if (output_inst) output_inst->output_to_input_instance[index] = NULL; if (ctrl->val >= FIXED_MENU_ITEMS) { output_inst = vivid_ctrl_svid_to_output_instance[ctrl->val]; index = vivid_ctrl_svid_to_output_index[ctrl->val]; output_inst->output_to_input_instance[index] = dev; output_inst->output_to_input_index[index] = dev->svid_index_to_input_index[svid_index]; } spin_lock(&svid_output_skip_mask_lock); svid_to_output_menu_skip_mask &= ~(1ULL << ctrl->cur.val); if (ctrl->val >= FIXED_MENU_ITEMS) svid_to_output_menu_skip_mask |= 1ULL << ctrl->val; spin_unlock(&svid_output_skip_mask_lock); vivid_update_quality(dev); vivid_send_input_source_change(dev, dev->svid_index_to_input_index[svid_index]); if (ctrl->val < FIXED_MENU_ITEMS && ctrl->cur.val < FIXED_MENU_ITEMS) break; queue_work(update_svid_ctrls_workqueue, &dev->update_svid_ctrl_work); break; } return 0; } static const struct v4l2_ctrl_ops vivid_vid_cap_ctrl_ops = { .s_ctrl = vivid_vid_cap_s_ctrl, }; static const char * const vivid_ctrl_hor_movement_strings[] = { "Move Left Fast", "Move Left", "Move Left Slow", "No Movement", "Move Right Slow", "Move Right", "Move Right Fast", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_hor_movement = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HOR_MOVEMENT, .name = "Horizontal Movement", .type = V4L2_CTRL_TYPE_MENU, .max = TPG_MOVE_POS_FAST, .def = TPG_MOVE_NONE, .qmenu = vivid_ctrl_hor_movement_strings, }; static const char * const vivid_ctrl_vert_movement_strings[] = { "Move Up Fast", "Move Up", "Move Up Slow", "No Movement", "Move Down Slow", "Move Down", "Move Down Fast", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_vert_movement = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_VERT_MOVEMENT, .name = "Vertical Movement", .type = V4L2_CTRL_TYPE_MENU, .max = TPG_MOVE_POS_FAST, .def = TPG_MOVE_NONE, .qmenu = vivid_ctrl_vert_movement_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_show_border = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_SHOW_BORDER, .name = "Show Border", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_show_square = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_SHOW_SQUARE, .name = "Show Square", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const char * const vivid_ctrl_osd_mode_strings[] = { "All", "Counters Only", "None", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_osd_mode = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_OSD_TEXT_MODE, .name = "OSD Text Mode", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_osd_mode_strings) - 2, .qmenu = vivid_ctrl_osd_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_perc_fill = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_PERCENTAGE_FILL, .name = "Fill Percentage of Frame", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0, .max = 100, .def = 100, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_insert_sav = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_INSERT_SAV, .name = "Insert SAV Code in Image", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_insert_eav = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_INSERT_EAV, .name = "Insert EAV Code in Image", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_insert_hdmi_video_guard_band = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_INSERT_HDMI_VIDEO_GUARD_BAND, .name = "Insert Video Guard Band", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_hflip = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HFLIP, .name = "Sensor Flipped Horizontally", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_vflip = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_VFLIP, .name = "Sensor Flipped Vertically", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_reduced_fps = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_REDUCED_FPS, .name = "Reduced Framerate", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_crop_cap = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HAS_CROP_CAP, .name = "Enable Capture Cropping", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_compose_cap = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HAS_COMPOSE_CAP, .name = "Enable Capture Composing", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_scaler_cap = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HAS_SCALER_CAP, .name = "Enable Capture Scaler", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const char * const vivid_ctrl_tstamp_src_strings[] = { "End of Frame", "Start of Exposure", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_tstamp_src = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_TSTAMP_SRC, .name = "Timestamp Source", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_tstamp_src_strings) - 2, .qmenu = vivid_ctrl_tstamp_src_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_std_aspect_ratio = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_STD_ASPECT_RATIO, .name = "Standard Aspect Ratio", .type = V4L2_CTRL_TYPE_MENU, .min = 1, .max = 4, .def = 1, .qmenu = tpg_aspect_strings, }; static const char * const vivid_ctrl_dv_timings_signal_mode_strings[] = { "Current DV Timings", "No Signal", "No Lock", "Out of Range", "Selected DV Timings", "Cycle Through All DV Timings", "Custom DV Timings", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_dv_timings_signal_mode = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_DV_TIMINGS_SIGNAL_MODE, .name = "DV Timings Signal Mode", .type = V4L2_CTRL_TYPE_MENU, .max = 5, .qmenu = vivid_ctrl_dv_timings_signal_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_dv_timings_aspect_ratio = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_DV_TIMINGS_ASPECT_RATIO, .name = "DV Timings Aspect Ratio", .type = V4L2_CTRL_TYPE_MENU, .max = 3, .qmenu = tpg_aspect_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_max_edid_blocks = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_MAX_EDID_BLOCKS, .name = "Maximum EDID Blocks", .type = V4L2_CTRL_TYPE_INTEGER, .min = 1, .max = 256, .def = 2, .step = 1, }; static const char * const vivid_ctrl_colorspace_strings[] = { "SMPTE 170M", "Rec. 709", "sRGB", "opRGB", "BT.2020", "DCI-P3", "SMPTE 240M", "470 System M", "470 System BG", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_colorspace = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_COLORSPACE, .name = "Colorspace", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_colorspace_strings) - 2, .def = 2, .qmenu = vivid_ctrl_colorspace_strings, }; static const char * const vivid_ctrl_xfer_func_strings[] = { "Default", "Rec. 709", "sRGB", "opRGB", "SMPTE 240M", "None", "DCI-P3", "SMPTE 2084", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_xfer_func = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_XFER_FUNC, .name = "Transfer Function", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_xfer_func_strings) - 2, .qmenu = vivid_ctrl_xfer_func_strings, }; static const char * const vivid_ctrl_ycbcr_enc_strings[] = { "Default", "ITU-R 601", "Rec. 709", "xvYCC 601", "xvYCC 709", "", "BT.2020", "BT.2020 Constant Luminance", "SMPTE 240M", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_ycbcr_enc = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_YCBCR_ENC, .name = "Y'CbCr Encoding", .type = V4L2_CTRL_TYPE_MENU, .menu_skip_mask = 1 << 5, .max = ARRAY_SIZE(vivid_ctrl_ycbcr_enc_strings) - 2, .qmenu = vivid_ctrl_ycbcr_enc_strings, }; static const char * const vivid_ctrl_hsv_enc_strings[] = { "Hue 0-179", "Hue 0-256", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_hsv_enc = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HSV_ENC, .name = "HSV Encoding", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_hsv_enc_strings) - 2, .qmenu = vivid_ctrl_hsv_enc_strings, }; static const char * const vivid_ctrl_quantization_strings[] = { "Default", "Full Range", "Limited Range", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_quantization = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_QUANTIZATION, .name = "Quantization", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_quantization_strings) - 2, .qmenu = vivid_ctrl_quantization_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_alpha_mode = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_ALPHA_MODE, .name = "Apply Alpha To Red Only", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_limited_rgb_range = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_LIMITED_RGB_RANGE, .name = "Limited RGB Range (16-235)", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* VBI Capture Control */ static int vivid_vbi_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_vbi_cap); switch (ctrl->id) { case VIVID_CID_VBI_CAP_INTERLACED: dev->vbi_cap_interlaced = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_vbi_cap_ctrl_ops = { .s_ctrl = vivid_vbi_cap_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_vbi_cap_interlaced = { .ops = &vivid_vbi_cap_ctrl_ops, .id = VIVID_CID_VBI_CAP_INTERLACED, .name = "Interlaced VBI Format", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* Video Output Controls */ static int vivid_vid_out_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_vid_out); struct v4l2_bt_timings *bt = &dev->dv_timings_out.bt; switch (ctrl->id) { case VIVID_CID_HAS_CROP_OUT: dev->has_crop_out = ctrl->val; vivid_update_format_out(dev); break; case VIVID_CID_HAS_COMPOSE_OUT: dev->has_compose_out = ctrl->val; vivid_update_format_out(dev); break; case VIVID_CID_HAS_SCALER_OUT: dev->has_scaler_out = ctrl->val; vivid_update_format_out(dev); break; case V4L2_CID_DV_TX_MODE: dev->dvi_d_out = ctrl->val == V4L2_DV_TX_MODE_DVI_D; if (!vivid_is_hdmi_out(dev)) break; if (!dev->dvi_d_out && (bt->flags & V4L2_DV_FL_IS_CE_VIDEO)) { if (bt->width == 720 && bt->height <= 576) dev->colorspace_out = V4L2_COLORSPACE_SMPTE170M; else dev->colorspace_out = V4L2_COLORSPACE_REC709; dev->quantization_out = V4L2_QUANTIZATION_DEFAULT; } else { dev->colorspace_out = V4L2_COLORSPACE_SRGB; dev->quantization_out = dev->dvi_d_out ? V4L2_QUANTIZATION_LIM_RANGE : V4L2_QUANTIZATION_DEFAULT; } if (vivid_output_is_connected_to(dev)) { struct vivid_dev *dev_rx = vivid_output_is_connected_to(dev); vivid_send_source_change(dev_rx, HDMI); } break; } return 0; } static const struct v4l2_ctrl_ops vivid_vid_out_ctrl_ops = { .s_ctrl = vivid_vid_out_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_has_crop_out = { .ops = &vivid_vid_out_ctrl_ops, .id = VIVID_CID_HAS_CROP_OUT, .name = "Enable Output Cropping", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_compose_out = { .ops = &vivid_vid_out_ctrl_ops, .id = VIVID_CID_HAS_COMPOSE_OUT, .name = "Enable Output Composing", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_has_scaler_out = { .ops = &vivid_vid_out_ctrl_ops, .id = VIVID_CID_HAS_SCALER_OUT, .name = "Enable Output Scaler", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; /* Streaming Controls */ static int vivid_streaming_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_streaming); switch (ctrl->id) { case VIVID_CID_DQBUF_ERROR: dev->dqbuf_error = true; break; case VIVID_CID_PERC_DROPPED: dev->perc_dropped_buffers = ctrl->val; break; case VIVID_CID_QUEUE_SETUP_ERROR: dev->queue_setup_error = true; break; case VIVID_CID_BUF_PREPARE_ERROR: dev->buf_prepare_error = true; break; case VIVID_CID_START_STR_ERROR: dev->start_streaming_error = true; break; case VIVID_CID_REQ_VALIDATE_ERROR: dev->req_validate_error = true; break; case VIVID_CID_QUEUE_ERROR: if (vb2_start_streaming_called(&dev->vb_vid_cap_q)) vb2_queue_error(&dev->vb_vid_cap_q); if (vb2_start_streaming_called(&dev->vb_vbi_cap_q)) vb2_queue_error(&dev->vb_vbi_cap_q); if (vb2_start_streaming_called(&dev->vb_vid_out_q)) vb2_queue_error(&dev->vb_vid_out_q); if (vb2_start_streaming_called(&dev->vb_vbi_out_q)) vb2_queue_error(&dev->vb_vbi_out_q); if (vb2_start_streaming_called(&dev->vb_sdr_cap_q)) vb2_queue_error(&dev->vb_sdr_cap_q); break; case VIVID_CID_SEQ_WRAP: dev->seq_wrap = ctrl->val; break; case VIVID_CID_TIME_WRAP: dev->time_wrap = ctrl->val; if (dev->time_wrap == 1) dev->time_wrap = (1ULL << 63) - NSEC_PER_SEC * 16ULL; else if (dev->time_wrap == 2) dev->time_wrap = ((1ULL << 31) - 16) * NSEC_PER_SEC; break; } return 0; } static const struct v4l2_ctrl_ops vivid_streaming_ctrl_ops = { .s_ctrl = vivid_streaming_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_dqbuf_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_DQBUF_ERROR, .name = "Inject V4L2_BUF_FLAG_ERROR", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_perc_dropped = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_PERC_DROPPED, .name = "Percentage of Dropped Buffers", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0, .max = 100, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_queue_setup_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_QUEUE_SETUP_ERROR, .name = "Inject VIDIOC_REQBUFS Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_buf_prepare_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_BUF_PREPARE_ERROR, .name = "Inject VIDIOC_QBUF Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_start_streaming_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_START_STR_ERROR, .name = "Inject VIDIOC_STREAMON Error", .type = V4L2_CTRL_TYPE_BUTTON, }; static const struct v4l2_ctrl_config vivid_ctrl_queue_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_QUEUE_ERROR, .name = "Inject Fatal Streaming Error", .type = V4L2_CTRL_TYPE_BUTTON, }; #ifdef CONFIG_MEDIA_CONTROLLER static const struct v4l2_ctrl_config vivid_ctrl_req_validate_error = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_REQ_VALIDATE_ERROR, .name = "Inject req_validate() Error", .type = V4L2_CTRL_TYPE_BUTTON, }; #endif static const struct v4l2_ctrl_config vivid_ctrl_seq_wrap = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_SEQ_WRAP, .name = "Wrap Sequence Number", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const char * const vivid_ctrl_time_wrap_strings[] = { "None", "64 Bit", "32 Bit", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_time_wrap = { .ops = &vivid_streaming_ctrl_ops, .id = VIVID_CID_TIME_WRAP, .name = "Wrap Timestamp", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_time_wrap_strings) - 2, .qmenu = vivid_ctrl_time_wrap_strings, }; /* SDTV Capture Controls */ static int vivid_sdtv_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_sdtv_cap); switch (ctrl->id) { case VIVID_CID_STD_SIGNAL_MODE: dev->std_signal_mode[dev->input] = dev->ctrl_std_signal_mode->val; if (dev->std_signal_mode[dev->input] == SELECTED_STD) dev->query_std[dev->input] = vivid_standard[dev->ctrl_standard->val]; v4l2_ctrl_activate(dev->ctrl_standard, dev->std_signal_mode[dev->input] == SELECTED_STD); vivid_update_quality(dev); vivid_send_source_change(dev, TV); vivid_send_source_change(dev, SVID); break; } return 0; } static const struct v4l2_ctrl_ops vivid_sdtv_cap_ctrl_ops = { .s_ctrl = vivid_sdtv_cap_s_ctrl, }; static const char * const vivid_ctrl_std_signal_mode_strings[] = { "Current Standard", "No Signal", "No Lock", "", "Selected Standard", "Cycle Through All Standards", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_std_signal_mode = { .ops = &vivid_sdtv_cap_ctrl_ops, .id = VIVID_CID_STD_SIGNAL_MODE, .name = "Standard Signal Mode", .type = V4L2_CTRL_TYPE_MENU, .max = ARRAY_SIZE(vivid_ctrl_std_signal_mode_strings) - 2, .menu_skip_mask = 1 << 3, .qmenu = vivid_ctrl_std_signal_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_standard = { .ops = &vivid_sdtv_cap_ctrl_ops, .id = VIVID_CID_STANDARD, .name = "Standard", .type = V4L2_CTRL_TYPE_MENU, .max = 14, .qmenu = vivid_ctrl_standard_strings, }; /* Radio Receiver Controls */ static int vivid_radio_rx_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_radio_rx); switch (ctrl->id) { case VIVID_CID_RADIO_SEEK_MODE: dev->radio_rx_hw_seek_mode = ctrl->val; break; case VIVID_CID_RADIO_SEEK_PROG_LIM: dev->radio_rx_hw_seek_prog_lim = ctrl->val; break; case VIVID_CID_RADIO_RX_RDS_RBDS: dev->rds_gen.use_rbds = ctrl->val; break; case VIVID_CID_RADIO_RX_RDS_BLOCKIO: dev->radio_rx_rds_controls = ctrl->val; dev->radio_rx_caps &= ~V4L2_CAP_READWRITE; dev->radio_rx_rds_use_alternates = false; if (!dev->radio_rx_rds_controls) { dev->radio_rx_caps |= V4L2_CAP_READWRITE; __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_pty, 0); __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ta, 0); __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_tp, 0); __v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ms, 0); __v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_psname, ""); __v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_radiotext, ""); } v4l2_ctrl_activate(dev->radio_rx_rds_pty, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_psname, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_radiotext, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_ta, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_tp, dev->radio_rx_rds_controls); v4l2_ctrl_activate(dev->radio_rx_rds_ms, dev->radio_rx_rds_controls); dev->radio_rx_dev.device_caps = dev->radio_rx_caps; break; case V4L2_CID_RDS_RECEPTION: dev->radio_rx_rds_enabled = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_radio_rx_ctrl_ops = { .s_ctrl = vivid_radio_rx_s_ctrl, }; static const char * const vivid_ctrl_radio_rds_mode_strings[] = { "Block I/O", "Controls", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_rx_rds_blockio = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_RX_RDS_BLOCKIO, .name = "RDS Rx I/O Mode", .type = V4L2_CTRL_TYPE_MENU, .qmenu = vivid_ctrl_radio_rds_mode_strings, .max = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_rx_rds_rbds = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_RX_RDS_RBDS, .name = "Generate RBDS Instead of RDS", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; static const char * const vivid_ctrl_radio_hw_seek_mode_strings[] = { "Bounded", "Wrap Around", "Both", NULL, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_hw_seek_mode = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_SEEK_MODE, .name = "Radio HW Seek Mode", .type = V4L2_CTRL_TYPE_MENU, .max = 2, .qmenu = vivid_ctrl_radio_hw_seek_mode_strings, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_hw_seek_prog_lim = { .ops = &vivid_radio_rx_ctrl_ops, .id = VIVID_CID_RADIO_SEEK_PROG_LIM, .name = "Radio Programmable HW Seek", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .step = 1, }; /* Radio Transmitter Controls */ static int vivid_radio_tx_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_radio_tx); switch (ctrl->id) { case VIVID_CID_RADIO_TX_RDS_BLOCKIO: dev->radio_tx_rds_controls = ctrl->val; dev->radio_tx_caps &= ~V4L2_CAP_READWRITE; if (!dev->radio_tx_rds_controls) dev->radio_tx_caps |= V4L2_CAP_READWRITE; dev->radio_tx_dev.device_caps = dev->radio_tx_caps; break; case V4L2_CID_RDS_TX_PTY: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_pty, ctrl->val); break; case V4L2_CID_RDS_TX_PS_NAME: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_psname, ctrl->p_new.p_char); break; case V4L2_CID_RDS_TX_RADIO_TEXT: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_radiotext, ctrl->p_new.p_char); break; case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ta, ctrl->val); break; case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_tp, ctrl->val); break; case V4L2_CID_RDS_TX_MUSIC_SPEECH: if (dev->radio_rx_rds_controls) v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ms, ctrl->val); break; } return 0; } static const struct v4l2_ctrl_ops vivid_radio_tx_ctrl_ops = { .s_ctrl = vivid_radio_tx_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_radio_tx_rds_blockio = { .ops = &vivid_radio_tx_ctrl_ops, .id = VIVID_CID_RADIO_TX_RDS_BLOCKIO, .name = "RDS Tx I/O Mode", .type = V4L2_CTRL_TYPE_MENU, .qmenu = vivid_ctrl_radio_rds_mode_strings, .max = 1, .def = 1, }; /* SDR Capture Controls */ static int vivid_sdr_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_sdr_cap); switch (ctrl->id) { case VIVID_CID_SDR_CAP_FM_DEVIATION: dev->sdr_fm_deviation = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_sdr_cap_ctrl_ops = { .s_ctrl = vivid_sdr_cap_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_sdr_cap_fm_deviation = { .ops = &vivid_sdr_cap_ctrl_ops, .id = VIVID_CID_SDR_CAP_FM_DEVIATION, .name = "FM Deviation", .type = V4L2_CTRL_TYPE_INTEGER, .min = 100, .max = 200000, .def = 75000, .step = 1, }; /* Metadata Capture Control */ static int vivid_meta_cap_s_ctrl(struct v4l2_ctrl *ctrl) { struct vivid_dev *dev = container_of(ctrl->handler, struct vivid_dev, ctrl_hdl_meta_cap); switch (ctrl->id) { case VIVID_CID_META_CAP_GENERATE_PTS: dev->meta_pts = ctrl->val; break; case VIVID_CID_META_CAP_GENERATE_SCR: dev->meta_scr = ctrl->val; break; } return 0; } static const struct v4l2_ctrl_ops vivid_meta_cap_ctrl_ops = { .s_ctrl = vivid_meta_cap_s_ctrl, }; static const struct v4l2_ctrl_config vivid_ctrl_meta_has_pts = { .ops = &vivid_meta_cap_ctrl_ops, .id = VIVID_CID_META_CAP_GENERATE_PTS, .name = "Generate PTS", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_meta_has_src_clk = { .ops = &vivid_meta_cap_ctrl_ops, .id = VIVID_CID_META_CAP_GENERATE_SCR, .name = "Generate SCR", .type = V4L2_CTRL_TYPE_BOOLEAN, .max = 1, .def = 1, .step = 1, }; static const struct v4l2_ctrl_config vivid_ctrl_class = { .ops = &vivid_user_gen_ctrl_ops, .flags = V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY, .id = VIVID_CID_VIVID_CLASS, .name = "Vivid Controls", .type = V4L2_CTRL_TYPE_CTRL_CLASS, }; int vivid_create_controls(struct vivid_dev *dev, bool show_ccs_cap, bool show_ccs_out, bool no_error_inj, bool has_sdtv, bool has_hdmi) { struct v4l2_ctrl_handler *hdl_user_gen = &dev->ctrl_hdl_user_gen; struct v4l2_ctrl_handler *hdl_user_vid = &dev->ctrl_hdl_user_vid; struct v4l2_ctrl_handler *hdl_user_aud = &dev->ctrl_hdl_user_aud; struct v4l2_ctrl_handler *hdl_streaming = &dev->ctrl_hdl_streaming; struct v4l2_ctrl_handler *hdl_sdtv_cap = &dev->ctrl_hdl_sdtv_cap; struct v4l2_ctrl_handler *hdl_loop_cap = &dev->ctrl_hdl_loop_cap; struct v4l2_ctrl_handler *hdl_fb = &dev->ctrl_hdl_fb; struct v4l2_ctrl_handler *hdl_vid_cap = &dev->ctrl_hdl_vid_cap; struct v4l2_ctrl_handler *hdl_vid_out = &dev->ctrl_hdl_vid_out; struct v4l2_ctrl_handler *hdl_vbi_cap = &dev->ctrl_hdl_vbi_cap; struct v4l2_ctrl_handler *hdl_vbi_out = &dev->ctrl_hdl_vbi_out; struct v4l2_ctrl_handler *hdl_radio_rx = &dev->ctrl_hdl_radio_rx; struct v4l2_ctrl_handler *hdl_radio_tx = &dev->ctrl_hdl_radio_tx; struct v4l2_ctrl_handler *hdl_sdr_cap = &dev->ctrl_hdl_sdr_cap; struct v4l2_ctrl_handler *hdl_meta_cap = &dev->ctrl_hdl_meta_cap; struct v4l2_ctrl_handler *hdl_meta_out = &dev->ctrl_hdl_meta_out; struct v4l2_ctrl_handler *hdl_tch_cap = &dev->ctrl_hdl_touch_cap; struct v4l2_ctrl_config vivid_ctrl_dv_timings = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_DV_TIMINGS, .name = "DV Timings", .type = V4L2_CTRL_TYPE_MENU, }; int i; v4l2_ctrl_handler_init(hdl_user_gen, 10); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_user_vid, 9); v4l2_ctrl_new_custom(hdl_user_vid, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_user_aud, 2); v4l2_ctrl_new_custom(hdl_user_aud, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_streaming, 8); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_sdtv_cap, 2); v4l2_ctrl_new_custom(hdl_sdtv_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_loop_cap, 1); v4l2_ctrl_new_custom(hdl_loop_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_fb, 1); v4l2_ctrl_new_custom(hdl_fb, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vid_cap, 55); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vid_out, 26); if (!no_error_inj || dev->has_fb || dev->num_hdmi_outputs) v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vbi_cap, 21); v4l2_ctrl_new_custom(hdl_vbi_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_vbi_out, 19); if (!no_error_inj) v4l2_ctrl_new_custom(hdl_vbi_out, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_radio_rx, 17); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_radio_tx, 17); v4l2_ctrl_new_custom(hdl_radio_tx, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_sdr_cap, 19); v4l2_ctrl_new_custom(hdl_sdr_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_meta_cap, 2); v4l2_ctrl_new_custom(hdl_meta_cap, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_meta_out, 2); v4l2_ctrl_new_custom(hdl_meta_out, &vivid_ctrl_class, NULL); v4l2_ctrl_handler_init(hdl_tch_cap, 2); v4l2_ctrl_new_custom(hdl_tch_cap, &vivid_ctrl_class, NULL); /* User Controls */ dev->volume = v4l2_ctrl_new_std(hdl_user_aud, NULL, V4L2_CID_AUDIO_VOLUME, 0, 255, 1, 200); dev->mute = v4l2_ctrl_new_std(hdl_user_aud, NULL, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0); if (dev->has_vid_cap) { dev->brightness = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); for (i = 0; i < MAX_INPUTS; i++) dev->input_brightness[i] = 128; dev->contrast = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 128); dev->saturation = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_SATURATION, 0, 255, 1, 128); dev->hue = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_HUE, -128, 128, 1, 0); v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); dev->autogain = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); dev->gain = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_GAIN, 0, 255, 1, 100); dev->alpha = v4l2_ctrl_new_std(hdl_user_vid, &vivid_user_vid_ctrl_ops, V4L2_CID_ALPHA_COMPONENT, 0, 255, 1, 0); } dev->button = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_button, NULL); dev->int32 = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_int32, NULL); dev->int64 = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_int64, NULL); dev->boolean = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_boolean, NULL); dev->menu = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_menu, NULL); dev->string = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_string, NULL); dev->bitmask = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_bitmask, NULL); dev->int_menu = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_int_menu, NULL); dev->ro_int32 = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_ro_int32, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_area, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u32_array, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u32_dyn_array, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u16_matrix, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u8_4d_array, NULL); dev->pixel_array = v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_u8_pixel_array, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_s32_array, NULL); v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_s64_array, NULL); if (dev->has_vid_cap) { /* Image Processing Controls */ struct v4l2_ctrl_config vivid_ctrl_test_pattern = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_TEST_PATTERN, .name = "Test Pattern", .type = V4L2_CTRL_TYPE_MENU, .max = TPG_PAT_NOISE, .qmenu = tpg_pattern_strings, }; dev->test_pattern = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_test_pattern, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_perc_fill, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_hor_movement, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_vert_movement, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_osd_mode, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_show_border, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_show_square, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_hflip, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_vflip, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_insert_sav, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_insert_eav, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_insert_hdmi_video_guard_band, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_reduced_fps, NULL); WARN_ON(dev->num_hdmi_inputs > MAX_HDMI_INPUTS); WARN_ON(dev->num_svid_inputs > MAX_SVID_INPUTS); for (u8 i = 0; i < dev->num_hdmi_inputs; i++) { snprintf(dev->ctrl_hdmi_to_output_names[i], sizeof(dev->ctrl_hdmi_to_output_names[i]), "HDMI %03u-%u Is Connected To", dev->inst, i); } for (u8 i = 0; i < dev->num_hdmi_inputs; i++) { struct v4l2_ctrl_config ctrl_config = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_HDMI_IS_CONNECTED_TO_OUTPUT(i), .name = dev->ctrl_hdmi_to_output_names[i], .type = V4L2_CTRL_TYPE_MENU, .max = 1, .qmenu = (const char * const *)vivid_ctrl_hdmi_to_output_strings, }; dev->ctrl_hdmi_to_output[i] = v4l2_ctrl_new_custom(hdl_vid_cap, &ctrl_config, NULL); } for (u8 i = 0; i < dev->num_svid_inputs; i++) { snprintf(dev->ctrl_svid_to_output_names[i], sizeof(dev->ctrl_svid_to_output_names[i]), "S-Video %03u-%u Is Connected To", dev->inst, i); } for (u8 i = 0; i < dev->num_svid_inputs; i++) { struct v4l2_ctrl_config ctrl_config = { .ops = &vivid_vid_cap_ctrl_ops, .id = VIVID_CID_SVID_IS_CONNECTED_TO_OUTPUT(i), .name = dev->ctrl_svid_to_output_names[i], .type = V4L2_CTRL_TYPE_MENU, .max = 1, .qmenu = (const char * const *)vivid_ctrl_svid_to_output_strings, }; dev->ctrl_svid_to_output[i] = v4l2_ctrl_new_custom(hdl_vid_cap, &ctrl_config, NULL); } if (show_ccs_cap) { dev->ctrl_has_crop_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_has_crop_cap, NULL); dev->ctrl_has_compose_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_has_compose_cap, NULL); dev->ctrl_has_scaler_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_has_scaler_cap, NULL); } v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_tstamp_src, NULL); dev->colorspace = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_colorspace, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_xfer_func, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_ycbcr_enc, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_hsv_enc, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_quantization, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_alpha_mode, NULL); } if (dev->has_vid_out && show_ccs_out) { dev->ctrl_has_crop_out = v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_has_crop_out, NULL); dev->ctrl_has_compose_out = v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_has_compose_out, NULL); dev->ctrl_has_scaler_out = v4l2_ctrl_new_custom(hdl_vid_out, &vivid_ctrl_has_scaler_out, NULL); } /* * Testing this driver with v4l2-compliance will trigger the error * injection controls, and after that nothing will work as expected. * So we have a module option to drop these error injecting controls * allowing us to run v4l2_compliance again. */ if (!no_error_inj) { v4l2_ctrl_new_custom(hdl_user_gen, &vivid_ctrl_disconnect, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_dqbuf_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_perc_dropped, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_queue_setup_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_buf_prepare_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_start_streaming_error, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_queue_error, NULL); #ifdef CONFIG_MEDIA_CONTROLLER v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_req_validate_error, NULL); #endif v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_seq_wrap, NULL); v4l2_ctrl_new_custom(hdl_streaming, &vivid_ctrl_time_wrap, NULL); } if (has_sdtv && (dev->has_vid_cap || dev->has_vbi_cap)) { if (dev->has_vid_cap) v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_std_aspect_ratio, NULL); dev->ctrl_std_signal_mode = v4l2_ctrl_new_custom(hdl_sdtv_cap, &vivid_ctrl_std_signal_mode, NULL); dev->ctrl_standard = v4l2_ctrl_new_custom(hdl_sdtv_cap, &vivid_ctrl_standard, NULL); if (dev->ctrl_std_signal_mode) v4l2_ctrl_cluster(2, &dev->ctrl_std_signal_mode); if (dev->has_raw_vbi_cap) v4l2_ctrl_new_custom(hdl_vbi_cap, &vivid_ctrl_vbi_cap_interlaced, NULL); } if (dev->num_hdmi_inputs) { s64 hdmi_input_mask = GENMASK(dev->num_hdmi_inputs - 1, 0); dev->ctrl_dv_timings_signal_mode = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_dv_timings_signal_mode, NULL); vivid_ctrl_dv_timings.max = dev->query_dv_timings_size - 1; vivid_ctrl_dv_timings.qmenu = (const char * const *)dev->query_dv_timings_qmenu; dev->ctrl_dv_timings = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_dv_timings, NULL); if (dev->ctrl_dv_timings_signal_mode) v4l2_ctrl_cluster(2, &dev->ctrl_dv_timings_signal_mode); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_dv_timings_aspect_ratio, NULL); v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_max_edid_blocks, NULL); dev->real_rgb_range_cap = v4l2_ctrl_new_custom(hdl_vid_cap, &vivid_ctrl_limited_rgb_range, NULL); dev->rgb_range_cap = v4l2_ctrl_new_std_menu(hdl_vid_cap, &vivid_vid_cap_ctrl_ops, V4L2_CID_DV_RX_RGB_RANGE, V4L2_DV_RGB_RANGE_FULL, 0, V4L2_DV_RGB_RANGE_AUTO); dev->ctrl_rx_power_present = v4l2_ctrl_new_std(hdl_vid_cap, NULL, V4L2_CID_DV_RX_POWER_PRESENT, 0, hdmi_input_mask, 0, hdmi_input_mask); } if (dev->num_hdmi_outputs) { s64 hdmi_output_mask = GENMASK(dev->num_hdmi_outputs - 1, 0); /* * We aren't doing anything with this at the moment, but * HDMI outputs typically have this controls. */ dev->ctrl_tx_rgb_range = v4l2_ctrl_new_std_menu(hdl_vid_out, NULL, V4L2_CID_DV_TX_RGB_RANGE, V4L2_DV_RGB_RANGE_FULL, 0, V4L2_DV_RGB_RANGE_AUTO); dev->ctrl_tx_mode = v4l2_ctrl_new_std_menu(hdl_vid_out, NULL, V4L2_CID_DV_TX_MODE, V4L2_DV_TX_MODE_HDMI, 0, V4L2_DV_TX_MODE_HDMI); dev->ctrl_tx_hotplug = v4l2_ctrl_new_std(hdl_vid_out, NULL, V4L2_CID_DV_TX_HOTPLUG, 0, hdmi_output_mask, 0, 0); dev->ctrl_tx_rxsense = v4l2_ctrl_new_std(hdl_vid_out, NULL, V4L2_CID_DV_TX_RXSENSE, 0, hdmi_output_mask, 0, 0); dev->ctrl_tx_edid_present = v4l2_ctrl_new_std(hdl_vid_out, NULL, V4L2_CID_DV_TX_EDID_PRESENT, 0, hdmi_output_mask, 0, 0); } if (dev->has_fb) v4l2_ctrl_new_custom(hdl_fb, &vivid_ctrl_clear_fb, NULL); if (dev->has_radio_rx) { v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_hw_seek_mode, NULL); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_hw_seek_prog_lim, NULL); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_rx_rds_blockio, NULL); v4l2_ctrl_new_custom(hdl_radio_rx, &vivid_ctrl_radio_rx_rds_rbds, NULL); v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RECEPTION, 0, 1, 1, 1); dev->radio_rx_rds_pty = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_PTY, 0, 31, 1, 0); dev->radio_rx_rds_psname = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_PS_NAME, 0, 8, 8, 0); dev->radio_rx_rds_radiotext = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_RADIO_TEXT, 0, 64, 64, 0); dev->radio_rx_rds_ta = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT, 0, 1, 1, 0); dev->radio_rx_rds_tp = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_TRAFFIC_PROGRAM, 0, 1, 1, 0); dev->radio_rx_rds_ms = v4l2_ctrl_new_std(hdl_radio_rx, &vivid_radio_rx_ctrl_ops, V4L2_CID_RDS_RX_MUSIC_SPEECH, 0, 1, 1, 1); } if (dev->has_radio_tx) { v4l2_ctrl_new_custom(hdl_radio_tx, &vivid_ctrl_radio_tx_rds_blockio, NULL); dev->radio_tx_rds_pi = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_PI, 0, 0xffff, 1, 0x8088); dev->radio_tx_rds_pty = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_PTY, 0, 31, 1, 3); dev->radio_tx_rds_psname = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_PS_NAME, 0, 8, 8, 0); if (dev->radio_tx_rds_psname) v4l2_ctrl_s_ctrl_string(dev->radio_tx_rds_psname, "VIVID-TX"); dev->radio_tx_rds_radiotext = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_RADIO_TEXT, 0, 64 * 2, 64, 0); if (dev->radio_tx_rds_radiotext) v4l2_ctrl_s_ctrl_string(dev->radio_tx_rds_radiotext, "This is a VIVID default Radio Text template text, change at will"); dev->radio_tx_rds_mono_stereo = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_MONO_STEREO, 0, 1, 1, 1); dev->radio_tx_rds_art_head = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_ARTIFICIAL_HEAD, 0, 1, 1, 0); dev->radio_tx_rds_compressed = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_COMPRESSED, 0, 1, 1, 0); dev->radio_tx_rds_dyn_pty = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_DYNAMIC_PTY, 0, 1, 1, 0); dev->radio_tx_rds_ta = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT, 0, 1, 1, 0); dev->radio_tx_rds_tp = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_TRAFFIC_PROGRAM, 0, 1, 1, 1); dev->radio_tx_rds_ms = v4l2_ctrl_new_std(hdl_radio_tx, &vivid_radio_tx_ctrl_ops, V4L2_CID_RDS_TX_MUSIC_SPEECH, 0, 1, 1, 1); } if (dev->has_sdr_cap) { v4l2_ctrl_new_custom(hdl_sdr_cap, &vivid_ctrl_sdr_cap_fm_deviation, NULL); } if (dev->has_meta_cap) { v4l2_ctrl_new_custom(hdl_meta_cap, &vivid_ctrl_meta_has_pts, NULL); v4l2_ctrl_new_custom(hdl_meta_cap, &vivid_ctrl_meta_has_src_clk, NULL); } if (hdl_user_gen->error) return hdl_user_gen->error; if (hdl_user_vid->error) return hdl_user_vid->error; if (hdl_user_aud->error) return hdl_user_aud->error; if (hdl_streaming->error) return hdl_streaming->error; if (hdl_sdr_cap->error) return hdl_sdr_cap->error; if (hdl_loop_cap->error) return hdl_loop_cap->error; if (dev->autogain) v4l2_ctrl_auto_cluster(2, &dev->autogain, 0, true); if (dev->has_vid_cap) { v4l2_ctrl_add_handler(hdl_vid_cap, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_user_vid, NULL, false); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_user_aud, NULL, false); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_streaming, NULL, false); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_sdtv_cap, NULL, false); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_loop_cap, NULL, false); v4l2_ctrl_add_handler(hdl_vid_cap, hdl_fb, NULL, false); if (hdl_vid_cap->error) return hdl_vid_cap->error; dev->vid_cap_dev.ctrl_handler = hdl_vid_cap; } if (dev->has_vid_out) { v4l2_ctrl_add_handler(hdl_vid_out, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_vid_out, hdl_user_aud, NULL, false); v4l2_ctrl_add_handler(hdl_vid_out, hdl_streaming, NULL, false); v4l2_ctrl_add_handler(hdl_vid_out, hdl_fb, NULL, false); if (hdl_vid_out->error) return hdl_vid_out->error; dev->vid_out_dev.ctrl_handler = hdl_vid_out; } if (dev->has_vbi_cap) { v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_streaming, NULL, false); v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_sdtv_cap, NULL, false); v4l2_ctrl_add_handler(hdl_vbi_cap, hdl_loop_cap, NULL, false); if (hdl_vbi_cap->error) return hdl_vbi_cap->error; dev->vbi_cap_dev.ctrl_handler = hdl_vbi_cap; } if (dev->has_vbi_out) { v4l2_ctrl_add_handler(hdl_vbi_out, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_vbi_out, hdl_streaming, NULL, false); if (hdl_vbi_out->error) return hdl_vbi_out->error; dev->vbi_out_dev.ctrl_handler = hdl_vbi_out; } if (dev->has_radio_rx) { v4l2_ctrl_add_handler(hdl_radio_rx, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_radio_rx, hdl_user_aud, NULL, false); if (hdl_radio_rx->error) return hdl_radio_rx->error; dev->radio_rx_dev.ctrl_handler = hdl_radio_rx; } if (dev->has_radio_tx) { v4l2_ctrl_add_handler(hdl_radio_tx, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_radio_tx, hdl_user_aud, NULL, false); if (hdl_radio_tx->error) return hdl_radio_tx->error; dev->radio_tx_dev.ctrl_handler = hdl_radio_tx; } if (dev->has_sdr_cap) { v4l2_ctrl_add_handler(hdl_sdr_cap, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_sdr_cap, hdl_streaming, NULL, false); if (hdl_sdr_cap->error) return hdl_sdr_cap->error; dev->sdr_cap_dev.ctrl_handler = hdl_sdr_cap; } if (dev->has_meta_cap) { v4l2_ctrl_add_handler(hdl_meta_cap, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_meta_cap, hdl_streaming, NULL, false); if (hdl_meta_cap->error) return hdl_meta_cap->error; dev->meta_cap_dev.ctrl_handler = hdl_meta_cap; } if (dev->has_meta_out) { v4l2_ctrl_add_handler(hdl_meta_out, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_meta_out, hdl_streaming, NULL, false); if (hdl_meta_out->error) return hdl_meta_out->error; dev->meta_out_dev.ctrl_handler = hdl_meta_out; } if (dev->has_touch_cap) { v4l2_ctrl_add_handler(hdl_tch_cap, hdl_user_gen, NULL, false); v4l2_ctrl_add_handler(hdl_tch_cap, hdl_streaming, NULL, false); if (hdl_tch_cap->error) return hdl_tch_cap->error; dev->touch_cap_dev.ctrl_handler = hdl_tch_cap; } return 0; } void vivid_free_controls(struct vivid_dev *dev) { v4l2_ctrl_handler_free(&dev->ctrl_hdl_vid_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_vid_out); v4l2_ctrl_handler_free(&dev->ctrl_hdl_vbi_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_vbi_out); v4l2_ctrl_handler_free(&dev->ctrl_hdl_radio_rx); v4l2_ctrl_handler_free(&dev->ctrl_hdl_radio_tx); v4l2_ctrl_handler_free(&dev->ctrl_hdl_sdr_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_user_gen); v4l2_ctrl_handler_free(&dev->ctrl_hdl_user_vid); v4l2_ctrl_handler_free(&dev->ctrl_hdl_user_aud); v4l2_ctrl_handler_free(&dev->ctrl_hdl_streaming); v4l2_ctrl_handler_free(&dev->ctrl_hdl_sdtv_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_loop_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_fb); v4l2_ctrl_handler_free(&dev->ctrl_hdl_meta_cap); v4l2_ctrl_handler_free(&dev->ctrl_hdl_meta_out); v4l2_ctrl_handler_free(&dev->ctrl_hdl_touch_cap); } |
2 2 1 1 1 1 4 1 3 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 | // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/media/radio/si470x/radio-si470x-common.c * * Driver for radios with Silicon Labs Si470x FM Radio Receivers * * Copyright (c) 2009 Tobias Lorenz <tobias.lorenz@gmx.net> * Copyright (c) 2012 Hans de Goede <hdegoede@redhat.com> */ /* * History: * 2008-01-12 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.0 * - First working version * 2008-01-13 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.1 * - Improved error handling, every function now returns errno * - Improved multi user access (start/mute/stop) * - Channel doesn't get lost anymore after start/mute/stop * - RDS support added (polling mode via interrupt EP 1) * - marked default module parameters with *value* * - switched from bit structs to bit masks * - header file cleaned and integrated * 2008-01-14 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.2 * - hex values are now lower case * - commented USB ID for ADS/Tech moved on todo list * - blacklisted si470x in hid-quirks.c * - rds buffer handling functions integrated into *_work, *_read * - rds_command in si470x_poll exchanged against simple retval * - check for firmware version 15 * - code order and prototypes still remain the same * - spacing and bottom of band codes remain the same * 2008-01-16 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.3 * - code reordered to avoid function prototypes * - switch/case defaults are now more user-friendly * - unified comment style * - applied all checkpatch.pl v1.12 suggestions * except the warning about the too long lines with bit comments * - renamed FMRADIO to RADIO to cut line length (checkpatch.pl) * 2008-01-22 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.4 * - avoid poss. locking when doing copy_to_user which may sleep * - RDS is automatically activated on read now * - code cleaned of unnecessary rds_commands * - USB Vendor/Product ID for ADS/Tech FM Radio Receiver verified * (thanks to Guillaume RAMOUSSE) * 2008-01-27 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.5 * - number of seek_retries changed to tune_timeout * - fixed problem with incomplete tune operations by own buffers * - optimization of variables and printf types * - improved error logging * 2008-01-31 Tobias Lorenz <tobias.lorenz@gmx.net> * Oliver Neukum <oliver@neukum.org> * Version 1.0.6 * - fixed coverity checker warnings in *_usb_driver_disconnect * - probe()/open() race by correct ordering in probe() * - DMA coherency rules by separate allocation of all buffers * - use of endianness macros * - abuse of spinlock, replaced by mutex * - racy handling of timer in disconnect, * replaced by delayed_work * - racy interruptible_sleep_on(), * replaced with wait_event_interruptible() * - handle signals in read() * 2008-02-08 Tobias Lorenz <tobias.lorenz@gmx.net> * Oliver Neukum <oliver@neukum.org> * Version 1.0.7 * - usb autosuspend support * - unplugging fixed * 2008-05-07 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.8 * - hardware frequency seek support * - afc indication * - more safety checks, let si470x_get_freq return errno * - vidioc behavior corrected according to v4l2 spec * 2008-10-20 Alexey Klimov <klimov.linux@gmail.com> * - add support for KWorld USB FM Radio FM700 * - blacklisted KWorld radio in hid-core.c and hid-ids.h * 2008-12-03 Mark Lord <mlord@pobox.com> * - add support for DealExtreme USB Radio * 2009-01-31 Bob Ross <pigiron@gmx.com> * - correction of stereo detection/setting * - correction of signal strength indicator scaling * 2009-01-31 Rick Bronson <rick@efn.org> * Tobias Lorenz <tobias.lorenz@gmx.net> * - add LED status output * - get HW/SW version from scratchpad * 2009-06-16 Edouard Lafargue <edouard@lafargue.name> * Version 1.0.10 * - add support for interrupt mode for RDS endpoint, * instead of polling. * Improves RDS reception significantly */ /* kernel includes */ #include "radio-si470x.h" /************************************************************************** * Module Parameters **************************************************************************/ /* Spacing (kHz) */ /* 0: 200 kHz (USA, Australia) */ /* 1: 100 kHz (Europe, Japan) */ /* 2: 50 kHz */ static unsigned short space = 2; module_param(space, ushort, 0444); MODULE_PARM_DESC(space, "Spacing: 0=200kHz 1=100kHz *2=50kHz*"); /* De-emphasis */ /* 0: 75 us (USA) */ /* 1: 50 us (Europe, Australia, Japan) */ static unsigned short de = 1; module_param(de, ushort, 0444); MODULE_PARM_DESC(de, "De-emphasis: 0=75us *1=50us*"); /* Tune timeout */ static unsigned int tune_timeout = 3000; module_param(tune_timeout, uint, 0644); MODULE_PARM_DESC(tune_timeout, "Tune timeout: *3000*"); /* Seek timeout */ static unsigned int seek_timeout = 5000; module_param(seek_timeout, uint, 0644); MODULE_PARM_DESC(seek_timeout, "Seek timeout: *5000*"); static const struct v4l2_frequency_band bands[] = { { .type = V4L2_TUNER_RADIO, .index = 0, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP, .rangelow = 87500 * 16, .rangehigh = 108000 * 16, .modulation = V4L2_BAND_MODULATION_FM, }, { .type = V4L2_TUNER_RADIO, .index = 1, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP, .rangelow = 76000 * 16, .rangehigh = 108000 * 16, .modulation = V4L2_BAND_MODULATION_FM, }, { .type = V4L2_TUNER_RADIO, .index = 2, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP, .rangelow = 76000 * 16, .rangehigh = 90000 * 16, .modulation = V4L2_BAND_MODULATION_FM, }, }; /************************************************************************** * Generic Functions **************************************************************************/ /* * si470x_set_band - set the band */ static int si470x_set_band(struct si470x_device *radio, int band) { if (radio->band == band) return 0; radio->band = band; radio->registers[SYSCONFIG2] &= ~SYSCONFIG2_BAND; radio->registers[SYSCONFIG2] |= radio->band << 6; return radio->set_register(radio, SYSCONFIG2); } /* * si470x_set_chan - set the channel */ static int si470x_set_chan(struct si470x_device *radio, unsigned short chan) { int retval; unsigned long time_left; bool timed_out = false; retval = radio->get_register(radio, POWERCFG); if (retval) return retval; if ((radio->registers[POWERCFG] & (POWERCFG_ENABLE|POWERCFG_DMUTE)) != (POWERCFG_ENABLE|POWERCFG_DMUTE)) { return 0; } /* start tuning */ radio->registers[CHANNEL] &= ~CHANNEL_CHAN; radio->registers[CHANNEL] |= CHANNEL_TUNE | chan; retval = radio->set_register(radio, CHANNEL); if (retval < 0) goto done; /* wait till tune operation has completed */ reinit_completion(&radio->completion); time_left = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(tune_timeout)); if (time_left == 0) timed_out = true; if ((radio->registers[STATUSRSSI] & STATUSRSSI_STC) == 0) dev_warn(&radio->videodev.dev, "tune does not complete\n"); if (timed_out) dev_warn(&radio->videodev.dev, "tune timed out after %u ms\n", tune_timeout); /* stop tuning */ radio->registers[CHANNEL] &= ~CHANNEL_TUNE; retval = radio->set_register(radio, CHANNEL); done: return retval; } /* * si470x_get_step - get channel spacing */ static unsigned int si470x_get_step(struct si470x_device *radio) { /* Spacing (kHz) */ switch ((radio->registers[SYSCONFIG2] & SYSCONFIG2_SPACE) >> 4) { /* 0: 200 kHz (USA, Australia) */ case 0: return 200 * 16; /* 1: 100 kHz (Europe, Japan) */ case 1: return 100 * 16; /* 2: 50 kHz */ default: return 50 * 16; } } /* * si470x_get_freq - get the frequency */ static int si470x_get_freq(struct si470x_device *radio, unsigned int *freq) { int chan, retval; /* read channel */ retval = radio->get_register(radio, READCHAN); chan = radio->registers[READCHAN] & READCHAN_READCHAN; /* Frequency (MHz) = Spacing (kHz) x Channel + Bottom of Band (MHz) */ *freq = chan * si470x_get_step(radio) + bands[radio->band].rangelow; return retval; } /* * si470x_set_freq - set the frequency */ int si470x_set_freq(struct si470x_device *radio, unsigned int freq) { unsigned short chan; freq = clamp(freq, bands[radio->band].rangelow, bands[radio->band].rangehigh); /* Chan = [ Freq (Mhz) - Bottom of Band (MHz) ] / Spacing (kHz) */ chan = (freq - bands[radio->band].rangelow) / si470x_get_step(radio); return si470x_set_chan(radio, chan); } EXPORT_SYMBOL_GPL(si470x_set_freq); /* * si470x_set_seek - set seek */ static int si470x_set_seek(struct si470x_device *radio, const struct v4l2_hw_freq_seek *seek) { int band, retval; unsigned int freq; bool timed_out = false; unsigned long time_left; /* set band */ if (seek->rangelow || seek->rangehigh) { for (band = 0; band < ARRAY_SIZE(bands); band++) { if (bands[band].rangelow == seek->rangelow && bands[band].rangehigh == seek->rangehigh) break; } if (band == ARRAY_SIZE(bands)) return -EINVAL; /* No matching band found */ } else band = 1; /* If nothing is specified seek 76 - 108 Mhz */ if (radio->band != band) { retval = si470x_get_freq(radio, &freq); if (retval) return retval; retval = si470x_set_band(radio, band); if (retval) return retval; retval = si470x_set_freq(radio, freq); if (retval) return retval; } /* start seeking */ radio->registers[POWERCFG] |= POWERCFG_SEEK; if (seek->wrap_around) radio->registers[POWERCFG] &= ~POWERCFG_SKMODE; else radio->registers[POWERCFG] |= POWERCFG_SKMODE; if (seek->seek_upward) radio->registers[POWERCFG] |= POWERCFG_SEEKUP; else radio->registers[POWERCFG] &= ~POWERCFG_SEEKUP; retval = radio->set_register(radio, POWERCFG); if (retval < 0) return retval; /* wait till tune operation has completed */ reinit_completion(&radio->completion); time_left = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(seek_timeout)); if (time_left == 0) timed_out = true; if ((radio->registers[STATUSRSSI] & STATUSRSSI_STC) == 0) dev_warn(&radio->videodev.dev, "seek does not complete\n"); if (radio->registers[STATUSRSSI] & STATUSRSSI_SF) dev_warn(&radio->videodev.dev, "seek failed / band limit reached\n"); /* stop seeking */ radio->registers[POWERCFG] &= ~POWERCFG_SEEK; retval = radio->set_register(radio, POWERCFG); /* try again, if timed out */ if (retval == 0 && timed_out) return -ENODATA; return retval; } /* * si470x_start - switch on radio */ int si470x_start(struct si470x_device *radio) { int retval; /* powercfg */ radio->registers[POWERCFG] = POWERCFG_DMUTE | POWERCFG_ENABLE | POWERCFG_RDSM; retval = radio->set_register(radio, POWERCFG); if (retval < 0) goto done; /* sysconfig 1 */ radio->registers[SYSCONFIG1] |= SYSCONFIG1_RDSIEN | SYSCONFIG1_STCIEN | SYSCONFIG1_RDS; radio->registers[SYSCONFIG1] &= ~SYSCONFIG1_GPIO2; radio->registers[SYSCONFIG1] |= SYSCONFIG1_GPIO2_INT; if (de) radio->registers[SYSCONFIG1] |= SYSCONFIG1_DE; retval = radio->set_register(radio, SYSCONFIG1); if (retval < 0) goto done; /* sysconfig 2 */ radio->registers[SYSCONFIG2] = (0x1f << 8) | /* SEEKTH */ ((radio->band << 6) & SYSCONFIG2_BAND) |/* BAND */ ((space << 4) & SYSCONFIG2_SPACE) | /* SPACE */ 15; /* VOLUME (max) */ retval = radio->set_register(radio, SYSCONFIG2); if (retval < 0) goto done; /* reset last channel */ retval = si470x_set_chan(radio, radio->registers[CHANNEL] & CHANNEL_CHAN); done: return retval; } EXPORT_SYMBOL_GPL(si470x_start); /* * si470x_stop - switch off radio */ int si470x_stop(struct si470x_device *radio) { int retval; /* sysconfig 1 */ radio->registers[SYSCONFIG1] &= ~SYSCONFIG1_RDS; retval = radio->set_register(radio, SYSCONFIG1); if (retval < 0) goto done; /* powercfg */ radio->registers[POWERCFG] &= ~POWERCFG_DMUTE; /* POWERCFG_ENABLE has to automatically go low */ radio->registers[POWERCFG] |= POWERCFG_ENABLE | POWERCFG_DISABLE; retval = radio->set_register(radio, POWERCFG); done: return retval; } EXPORT_SYMBOL_GPL(si470x_stop); /* * si470x_rds_on - switch on rds reception */ static int si470x_rds_on(struct si470x_device *radio) { int retval; /* sysconfig 1 */ radio->registers[SYSCONFIG1] |= SYSCONFIG1_RDS; retval = radio->set_register(radio, SYSCONFIG1); if (retval < 0) radio->registers[SYSCONFIG1] &= ~SYSCONFIG1_RDS; return retval; } /************************************************************************** * File Operations Interface **************************************************************************/ /* * si470x_fops_read - read RDS data */ static ssize_t si470x_fops_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct si470x_device *radio = video_drvdata(file); int retval = 0; unsigned int block_count = 0; /* switch on rds reception */ if ((radio->registers[SYSCONFIG1] & SYSCONFIG1_RDS) == 0) si470x_rds_on(radio); /* block if no new data available */ while (radio->wr_index == radio->rd_index) { if (file->f_flags & O_NONBLOCK) { retval = -EWOULDBLOCK; goto done; } if (wait_event_interruptible(radio->read_queue, radio->wr_index != radio->rd_index) < 0) { retval = -EINTR; goto done; } } /* calculate block count from byte count */ count /= 3; /* copy RDS block out of internal buffer and to user buffer */ while (block_count < count) { if (radio->rd_index == radio->wr_index) break; /* always transfer rds complete blocks */ if (copy_to_user(buf, &radio->buffer[radio->rd_index], 3)) /* retval = -EFAULT; */ break; /* increment and wrap read pointer */ radio->rd_index += 3; if (radio->rd_index >= radio->buf_size) radio->rd_index = 0; /* increment counters */ block_count++; buf += 3; retval += 3; } done: return retval; } /* * si470x_fops_poll - poll RDS data */ static __poll_t si470x_fops_poll(struct file *file, struct poll_table_struct *pts) { struct si470x_device *radio = video_drvdata(file); __poll_t req_events = poll_requested_events(pts); __poll_t retval = v4l2_ctrl_poll(file, pts); if (req_events & (EPOLLIN | EPOLLRDNORM)) { /* switch on rds reception */ if ((radio->registers[SYSCONFIG1] & SYSCONFIG1_RDS) == 0) si470x_rds_on(radio); poll_wait(file, &radio->read_queue, pts); if (radio->rd_index != radio->wr_index) retval |= EPOLLIN | EPOLLRDNORM; } return retval; } static int si470x_fops_open(struct file *file) { struct si470x_device *radio = video_drvdata(file); return radio->fops_open(file); } /* * si470x_fops_release - file release */ static int si470x_fops_release(struct file *file) { struct si470x_device *radio = video_drvdata(file); return radio->fops_release(file); } /* * si470x_fops - file operations interface */ static const struct v4l2_file_operations si470x_fops = { .owner = THIS_MODULE, .read = si470x_fops_read, .poll = si470x_fops_poll, .unlocked_ioctl = video_ioctl2, .open = si470x_fops_open, .release = si470x_fops_release, }; /************************************************************************** * Video4Linux Interface **************************************************************************/ static int si470x_s_ctrl(struct v4l2_ctrl *ctrl) { struct si470x_device *radio = container_of(ctrl->handler, struct si470x_device, hdl); switch (ctrl->id) { case V4L2_CID_AUDIO_VOLUME: radio->registers[SYSCONFIG2] &= ~SYSCONFIG2_VOLUME; radio->registers[SYSCONFIG2] |= ctrl->val; return radio->set_register(radio, SYSCONFIG2); case V4L2_CID_AUDIO_MUTE: if (ctrl->val) radio->registers[POWERCFG] &= ~POWERCFG_DMUTE; else radio->registers[POWERCFG] |= POWERCFG_DMUTE; return radio->set_register(radio, POWERCFG); default: return -EINVAL; } } /* * si470x_vidioc_g_tuner - get tuner attributes */ static int si470x_vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *tuner) { struct si470x_device *radio = video_drvdata(file); int retval = 0; if (tuner->index != 0) return -EINVAL; if (!radio->status_rssi_auto_update) { retval = radio->get_register(radio, STATUSRSSI); if (retval < 0) return retval; } /* driver constants */ strscpy(tuner->name, "FM", sizeof(tuner->name)); tuner->type = V4L2_TUNER_RADIO; tuner->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP; tuner->rangelow = 76 * FREQ_MUL; tuner->rangehigh = 108 * FREQ_MUL; /* stereo indicator == stereo (instead of mono) */ if ((radio->registers[STATUSRSSI] & STATUSRSSI_ST) == 0) tuner->rxsubchans = V4L2_TUNER_SUB_MONO; else tuner->rxsubchans = V4L2_TUNER_SUB_STEREO; /* If there is a reliable method of detecting an RDS channel, then this code should check for that before setting this RDS subchannel. */ tuner->rxsubchans |= V4L2_TUNER_SUB_RDS; /* mono/stereo selector */ if ((radio->registers[POWERCFG] & POWERCFG_MONO) == 0) tuner->audmode = V4L2_TUNER_MODE_STEREO; else tuner->audmode = V4L2_TUNER_MODE_MONO; /* min is worst, max is best; signal:0..0xffff; rssi: 0..0xff */ /* measured in units of dbµV in 1 db increments (max at ~75 dbµV) */ tuner->signal = (radio->registers[STATUSRSSI] & STATUSRSSI_RSSI); /* the ideal factor is 0xffff/75 = 873,8 */ tuner->signal = (tuner->signal * 873) + (8 * tuner->signal / 10); if (tuner->signal > 0xffff) tuner->signal = 0xffff; /* automatic frequency control: -1: freq to low, 1 freq to high */ /* AFCRL does only indicate that freq. differs, not if too low/high */ tuner->afc = (radio->registers[STATUSRSSI] & STATUSRSSI_AFCRL) ? 1 : 0; return retval; } /* * si470x_vidioc_s_tuner - set tuner attributes */ static int si470x_vidioc_s_tuner(struct file *file, void *priv, const struct v4l2_tuner *tuner) { struct si470x_device *radio = video_drvdata(file); if (tuner->index != 0) return -EINVAL; /* mono/stereo selector */ switch (tuner->audmode) { case V4L2_TUNER_MODE_MONO: radio->registers[POWERCFG] |= POWERCFG_MONO; /* force mono */ break; case V4L2_TUNER_MODE_STEREO: default: radio->registers[POWERCFG] &= ~POWERCFG_MONO; /* try stereo */ break; } return radio->set_register(radio, POWERCFG); } /* * si470x_vidioc_g_frequency - get tuner or modulator radio frequency */ static int si470x_vidioc_g_frequency(struct file *file, void *priv, struct v4l2_frequency *freq) { struct si470x_device *radio = video_drvdata(file); if (freq->tuner != 0) return -EINVAL; freq->type = V4L2_TUNER_RADIO; return si470x_get_freq(radio, &freq->frequency); } /* * si470x_vidioc_s_frequency - set tuner or modulator radio frequency */ static int si470x_vidioc_s_frequency(struct file *file, void *priv, const struct v4l2_frequency *freq) { struct si470x_device *radio = video_drvdata(file); int retval; if (freq->tuner != 0) return -EINVAL; if (freq->frequency < bands[radio->band].rangelow || freq->frequency > bands[radio->band].rangehigh) { /* Switch to band 1 which covers everything we support */ retval = si470x_set_band(radio, 1); if (retval) return retval; } return si470x_set_freq(radio, freq->frequency); } /* * si470x_vidioc_s_hw_freq_seek - set hardware frequency seek */ static int si470x_vidioc_s_hw_freq_seek(struct file *file, void *priv, const struct v4l2_hw_freq_seek *seek) { struct si470x_device *radio = video_drvdata(file); if (seek->tuner != 0) return -EINVAL; if (file->f_flags & O_NONBLOCK) return -EWOULDBLOCK; return si470x_set_seek(radio, seek); } /* * si470x_vidioc_enum_freq_bands - enumerate supported bands */ static int si470x_vidioc_enum_freq_bands(struct file *file, void *priv, struct v4l2_frequency_band *band) { if (band->tuner != 0) return -EINVAL; if (band->index >= ARRAY_SIZE(bands)) return -EINVAL; *band = bands[band->index]; return 0; } const struct v4l2_ctrl_ops si470x_ctrl_ops = { .s_ctrl = si470x_s_ctrl, }; EXPORT_SYMBOL_GPL(si470x_ctrl_ops); static int si470x_vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *capability) { struct si470x_device *radio = video_drvdata(file); return radio->vidioc_querycap(file, priv, capability); }; /* * si470x_ioctl_ops - video device ioctl operations */ static const struct v4l2_ioctl_ops si470x_ioctl_ops = { .vidioc_querycap = si470x_vidioc_querycap, .vidioc_g_tuner = si470x_vidioc_g_tuner, .vidioc_s_tuner = si470x_vidioc_s_tuner, .vidioc_g_frequency = si470x_vidioc_g_frequency, .vidioc_s_frequency = si470x_vidioc_s_frequency, .vidioc_s_hw_freq_seek = si470x_vidioc_s_hw_freq_seek, .vidioc_enum_freq_bands = si470x_vidioc_enum_freq_bands, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; /* * si470x_viddev_template - video device interface */ const struct video_device si470x_viddev_template = { .fops = &si470x_fops, .name = DRIVER_NAME, .release = video_device_release_empty, .ioctl_ops = &si470x_ioctl_ops, }; EXPORT_SYMBOL_GPL(si470x_viddev_template); MODULE_DESCRIPTION("Core radio driver for Si470x FM Radio Receivers"); MODULE_LICENSE("GPL"); |
785 1343 785 1340 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6 * * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> #include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables mangle table"); #define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, }; static unsigned int ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct in6_addr saddr, daddr; unsigned int ret, verdict; u32 flowlabel, mark; u8 hop_limit; int err; /* save source/dest address, mark, hoplimit, flowlabel, priority, */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); mark = skb->mark; hop_limit = ipv6_hdr(skb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ret = ip6t_do_table(priv, skb, state); verdict = ret & NF_VERDICT_MASK; if (verdict != NF_DROP && verdict != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } return ret; } /* The work comes in here from netfilter.c. */ static unsigned int ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(priv, skb, state); return ip6t_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; static int ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; int ret; repl = ip6t_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops); kfree(repl); return ret; } static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { ip6t_unregister_table_pre_exit(net, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "mangle"); } static struct pernet_operations ip6table_mangle_net_ops = { .pre_exit = ip6table_mangle_net_pre_exit, .exit = ip6table_mangle_net_exit, }; static int __init ip6table_mangle_init(void) { int ret = xt_register_template(&packet_mangler, ip6table_mangle_table_init); if (ret < 0) return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); if (IS_ERR(mangle_ops)) { xt_unregister_template(&packet_mangler); return PTR_ERR(mangle_ops); } ret = register_pernet_subsys(&ip6table_mangle_net_ops); if (ret < 0) { xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } return ret; } static void __exit ip6table_mangle_fini(void) { unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); kfree(mangle_ops); } module_init(ip6table_mangle_init); module_exit(ip6table_mangle_fini); |
3058 3053 3058 2 2 2 15 14 10 10 10 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 | // SPDX-License-Identifier: GPL-2.0+ /* * Procedures for creating, accessing and interpreting the device tree. * * Paul Mackerras August 1996. * Copyright (C) 1996-2005 Paul Mackerras. * * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. * {engebret|bergner}@us.ibm.com * * Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net * * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and * Grant Likely. */ #define pr_fmt(fmt) "OF: " fmt #include <linux/cleanup.h> #include <linux/console.h> #include <linux/ctype.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_graph.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/proc_fs.h> #include "of_private.h" LIST_HEAD(aliases_lookup); struct device_node *of_root; EXPORT_SYMBOL(of_root); struct device_node *of_chosen; EXPORT_SYMBOL(of_chosen); struct device_node *of_aliases; struct device_node *of_stdout; static const char *of_stdout_options; struct kset *of_kset; /* * Used to protect the of_aliases, to hold off addition of nodes to sysfs. * This mutex must be held whenever modifications are being made to the * device tree. The of_{attach,detach}_node() and * of_{add,remove,update}_property() helpers make sure this happens. */ DEFINE_MUTEX(of_mutex); /* use when traversing tree through the child, sibling, * or parent members of struct device_node. */ DEFINE_RAW_SPINLOCK(devtree_lock); bool of_node_name_eq(const struct device_node *np, const char *name) { const char *node_name; size_t len; if (!np) return false; node_name = kbasename(np->full_name); len = strchrnul(node_name, '@') - node_name; return (strlen(name) == len) && (strncmp(node_name, name, len) == 0); } EXPORT_SYMBOL(of_node_name_eq); bool of_node_name_prefix(const struct device_node *np, const char *prefix) { if (!np) return false; return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0; } EXPORT_SYMBOL(of_node_name_prefix); static bool __of_node_is_type(const struct device_node *np, const char *type) { const char *match = __of_get_property(np, "device_type", NULL); return np && match && type && !strcmp(match, type); } #define EXCLUDED_DEFAULT_CELLS_PLATFORMS ( \ IS_ENABLED(CONFIG_SPARC) || \ of_find_compatible_node(NULL, NULL, "coreboot") \ ) int of_bus_n_addr_cells(struct device_node *np) { u32 cells; for (; np; np = np->parent) { if (!of_property_read_u32(np, "#address-cells", &cells)) return cells; /* * Default root value and walking parent nodes for "#address-cells" * is deprecated. Any platforms which hit this warning should * be added to the excluded list. */ WARN_ONCE(!EXCLUDED_DEFAULT_CELLS_PLATFORMS, "Missing '#address-cells' in %pOF\n", np); } return OF_ROOT_NODE_ADDR_CELLS_DEFAULT; } int of_n_addr_cells(struct device_node *np) { if (np->parent) np = np->parent; return of_bus_n_addr_cells(np); } EXPORT_SYMBOL(of_n_addr_cells); int of_bus_n_size_cells(struct device_node *np) { u32 cells; for (; np; np = np->parent) { if (!of_property_read_u32(np, "#size-cells", &cells)) return cells; /* * Default root value and walking parent nodes for "#size-cells" * is deprecated. Any platforms which hit this warning should * be added to the excluded list. */ WARN_ONCE(!EXCLUDED_DEFAULT_CELLS_PLATFORMS, "Missing '#size-cells' in %pOF\n", np); } return OF_ROOT_NODE_SIZE_CELLS_DEFAULT; } int of_n_size_cells(struct device_node *np) { if (np->parent) np = np->parent; return of_bus_n_size_cells(np); } EXPORT_SYMBOL(of_n_size_cells); #ifdef CONFIG_NUMA int __weak of_node_to_nid(struct device_node *np) { return NUMA_NO_NODE; } #endif #define OF_PHANDLE_CACHE_BITS 7 #define OF_PHANDLE_CACHE_SZ BIT(OF_PHANDLE_CACHE_BITS) static struct device_node *phandle_cache[OF_PHANDLE_CACHE_SZ]; static u32 of_phandle_cache_hash(phandle handle) { return hash_32(handle, OF_PHANDLE_CACHE_BITS); } /* * Caller must hold devtree_lock. */ void __of_phandle_cache_inv_entry(phandle handle) { u32 handle_hash; struct device_node *np; if (!handle) return; handle_hash = of_phandle_cache_hash(handle); np = phandle_cache[handle_hash]; if (np && handle == np->phandle) phandle_cache[handle_hash] = NULL; } void __init of_core_init(void) { struct device_node *np; of_platform_register_reconfig_notifier(); /* Create the kset, and register existing nodes */ mutex_lock(&of_mutex); of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); if (!of_kset) { mutex_unlock(&of_mutex); pr_err("failed to register existing nodes\n"); return; } for_each_of_allnodes(np) { __of_attach_node_sysfs(np); if (np->phandle && !phandle_cache[of_phandle_cache_hash(np->phandle)]) phandle_cache[of_phandle_cache_hash(np->phandle)] = np; } mutex_unlock(&of_mutex); /* Symlink in /proc as required by userspace ABI */ if (of_root) proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base"); } static struct property *__of_find_property(const struct device_node *np, const char *name, int *lenp) { struct property *pp; if (!np) return NULL; for (pp = np->properties; pp; pp = pp->next) { if (of_prop_cmp(pp->name, name) == 0) { if (lenp) *lenp = pp->length; break; } } return pp; } struct property *of_find_property(const struct device_node *np, const char *name, int *lenp) { struct property *pp; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); pp = __of_find_property(np, name, lenp); raw_spin_unlock_irqrestore(&devtree_lock, flags); return pp; } EXPORT_SYMBOL(of_find_property); struct device_node *__of_find_all_nodes(struct device_node *prev) { struct device_node *np; if (!prev) { np = of_root; } else if (prev->child) { np = prev->child; } else { /* Walk back up looking for a sibling, or the end of the structure */ np = prev; while (np->parent && !np->sibling) np = np->parent; np = np->sibling; /* Might be null at the end of the tree */ } return np; } /** * of_find_all_nodes - Get next node in global list * @prev: Previous node or NULL to start iteration * of_node_put() will be called on it * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_all_nodes(struct device_node *prev) { struct device_node *np; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); np = __of_find_all_nodes(prev); of_node_get(np); of_node_put(prev); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_all_nodes); /* * Find a property with a given name for a given node * and return the value. */ const void *__of_get_property(const struct device_node *np, const char *name, int *lenp) { const struct property *pp = __of_find_property(np, name, lenp); return pp ? pp->value : NULL; } /* * Find a property with a given name for a given node * and return the value. */ const void *of_get_property(const struct device_node *np, const char *name, int *lenp) { const struct property *pp = of_find_property(np, name, lenp); return pp ? pp->value : NULL; } EXPORT_SYMBOL(of_get_property); /** * __of_device_is_compatible() - Check if the node matches given constraints * @device: pointer to node * @compat: required compatible string, NULL or "" for any match * @type: required device_type value, NULL or "" for any match * @name: required node name, NULL or "" for any match * * Checks if the given @compat, @type and @name strings match the * properties of the given @device. A constraints can be skipped by * passing NULL or an empty string as the constraint. * * Returns 0 for no match, and a positive integer on match. The return * value is a relative score with larger values indicating better * matches. The score is weighted for the most specific compatible value * to get the highest score. Matching type is next, followed by matching * name. Practically speaking, this results in the following priority * order for matches: * * 1. specific compatible && type && name * 2. specific compatible && type * 3. specific compatible && name * 4. specific compatible * 5. general compatible && type && name * 6. general compatible && type * 7. general compatible && name * 8. general compatible * 9. type && name * 10. type * 11. name */ static int __of_device_is_compatible(const struct device_node *device, const char *compat, const char *type, const char *name) { const struct property *prop; const char *cp; int index = 0, score = 0; /* Compatible match has highest priority */ if (compat && compat[0]) { prop = __of_find_property(device, "compatible", NULL); for (cp = of_prop_next_string(prop, NULL); cp; cp = of_prop_next_string(prop, cp), index++) { if (of_compat_cmp(cp, compat, strlen(compat)) == 0) { score = INT_MAX/2 - (index << 2); break; } } if (!score) return 0; } /* Matching type is better than matching name */ if (type && type[0]) { if (!__of_node_is_type(device, type)) return 0; score += 2; } /* Matching name is a bit better than not */ if (name && name[0]) { if (!of_node_name_eq(device, name)) return 0; score++; } return score; } /** Checks if the given "compat" string matches one of the strings in * the device's "compatible" property */ int of_device_is_compatible(const struct device_node *device, const char *compat) { unsigned long flags; int res; raw_spin_lock_irqsave(&devtree_lock, flags); res = __of_device_is_compatible(device, compat, NULL, NULL); raw_spin_unlock_irqrestore(&devtree_lock, flags); return res; } EXPORT_SYMBOL(of_device_is_compatible); /** Checks if the device is compatible with any of the entries in * a NULL terminated array of strings. Returns the best match * score or 0. */ int of_device_compatible_match(const struct device_node *device, const char *const *compat) { unsigned int tmp, score = 0; if (!compat) return 0; while (*compat) { tmp = of_device_is_compatible(device, *compat); if (tmp > score) score = tmp; compat++; } return score; } EXPORT_SYMBOL_GPL(of_device_compatible_match); /** * of_machine_compatible_match - Test root of device tree against a compatible array * @compats: NULL terminated array of compatible strings to look for in root node's compatible property. * * Returns true if the root node has any of the given compatible values in its * compatible property. */ bool of_machine_compatible_match(const char *const *compats) { struct device_node *root; int rc = 0; root = of_find_node_by_path("/"); if (root) { rc = of_device_compatible_match(root, compats); of_node_put(root); } return rc != 0; } EXPORT_SYMBOL(of_machine_compatible_match); static bool __of_device_is_status(const struct device_node *device, const char * const*strings) { const char *status; int statlen; if (!device) return false; status = __of_get_property(device, "status", &statlen); if (status == NULL) return false; if (statlen > 0) { while (*strings) { unsigned int len = strlen(*strings); if ((*strings)[len - 1] == '-') { if (!strncmp(status, *strings, len)) return true; } else { if (!strcmp(status, *strings)) return true; } strings++; } } return false; } /** * __of_device_is_available - check if a device is available for use * * @device: Node to check for availability, with locks already held * * Return: True if the status property is absent or set to "okay" or "ok", * false otherwise */ static bool __of_device_is_available(const struct device_node *device) { static const char * const ok[] = {"okay", "ok", NULL}; if (!device) return false; return !__of_get_property(device, "status", NULL) || __of_device_is_status(device, ok); } /** * __of_device_is_reserved - check if a device is reserved * * @device: Node to check for availability, with locks already held * * Return: True if the status property is set to "reserved", false otherwise */ static bool __of_device_is_reserved(const struct device_node *device) { static const char * const reserved[] = {"reserved", NULL}; return __of_device_is_status(device, reserved); } /** * of_device_is_available - check if a device is available for use * * @device: Node to check for availability * * Return: True if the status property is absent or set to "okay" or "ok", * false otherwise */ bool of_device_is_available(const struct device_node *device) { unsigned long flags; bool res; raw_spin_lock_irqsave(&devtree_lock, flags); res = __of_device_is_available(device); raw_spin_unlock_irqrestore(&devtree_lock, flags); return res; } EXPORT_SYMBOL(of_device_is_available); /** * __of_device_is_fail - check if a device has status "fail" or "fail-..." * * @device: Node to check status for, with locks already held * * Return: True if the status property is set to "fail" or "fail-..." (for any * error code suffix), false otherwise */ static bool __of_device_is_fail(const struct device_node *device) { static const char * const fail[] = {"fail", "fail-", NULL}; return __of_device_is_status(device, fail); } /** * of_device_is_big_endian - check if a device has BE registers * * @device: Node to check for endianness * * Return: True if the device has a "big-endian" property, or if the kernel * was compiled for BE *and* the device has a "native-endian" property. * Returns false otherwise. * * Callers would nominally use ioread32be/iowrite32be if * of_device_is_big_endian() == true, or readl/writel otherwise. */ bool of_device_is_big_endian(const struct device_node *device) { if (of_property_read_bool(device, "big-endian")) return true; if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && of_property_read_bool(device, "native-endian")) return true; return false; } EXPORT_SYMBOL(of_device_is_big_endian); /** * of_get_parent - Get a node's parent if any * @node: Node to get parent * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_get_parent(const struct device_node *node) { struct device_node *np; unsigned long flags; if (!node) return NULL; raw_spin_lock_irqsave(&devtree_lock, flags); np = of_node_get(node->parent); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_get_parent); /** * of_get_next_parent - Iterate to a node's parent * @node: Node to get parent of * * This is like of_get_parent() except that it drops the * refcount on the passed node, making it suitable for iterating * through a node's parents. * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_get_next_parent(struct device_node *node) { struct device_node *parent; unsigned long flags; if (!node) return NULL; raw_spin_lock_irqsave(&devtree_lock, flags); parent = of_node_get(node->parent); of_node_put(node); raw_spin_unlock_irqrestore(&devtree_lock, flags); return parent; } EXPORT_SYMBOL(of_get_next_parent); static struct device_node *__of_get_next_child(const struct device_node *node, struct device_node *prev) { struct device_node *next; if (!node) return NULL; next = prev ? prev->sibling : node->child; of_node_get(next); of_node_put(prev); return next; } #define __for_each_child_of_node(parent, child) \ for (child = __of_get_next_child(parent, NULL); child != NULL; \ child = __of_get_next_child(parent, child)) /** * of_get_next_child - Iterate a node childs * @node: parent node * @prev: previous child of the parent node, or NULL to get first * * Return: A node pointer with refcount incremented, use of_node_put() on * it when done. Returns NULL when prev is the last child. Decrements the * refcount of prev. */ struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev) { struct device_node *next; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); next = __of_get_next_child(node, prev); raw_spin_unlock_irqrestore(&devtree_lock, flags); return next; } EXPORT_SYMBOL(of_get_next_child); /** * of_get_next_child_with_prefix - Find the next child node with prefix * @node: parent node * @prev: previous child of the parent node, or NULL to get first * @prefix: prefix that the node name should have * * This function is like of_get_next_child(), except that it automatically * skips any nodes whose name doesn't have the given prefix. * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_get_next_child_with_prefix(const struct device_node *node, struct device_node *prev, const char *prefix) { struct device_node *next; unsigned long flags; if (!node) return NULL; raw_spin_lock_irqsave(&devtree_lock, flags); next = prev ? prev->sibling : node->child; for (; next; next = next->sibling) { if (!of_node_name_prefix(next, prefix)) continue; if (of_node_get(next)) break; } of_node_put(prev); raw_spin_unlock_irqrestore(&devtree_lock, flags); return next; } EXPORT_SYMBOL(of_get_next_child_with_prefix); static struct device_node *of_get_next_status_child(const struct device_node *node, struct device_node *prev, bool (*checker)(const struct device_node *)) { struct device_node *next; unsigned long flags; if (!node) return NULL; raw_spin_lock_irqsave(&devtree_lock, flags); next = prev ? prev->sibling : node->child; for (; next; next = next->sibling) { if (!checker(next)) continue; if (of_node_get(next)) break; } of_node_put(prev); raw_spin_unlock_irqrestore(&devtree_lock, flags); return next; } /** * of_get_next_available_child - Find the next available child node * @node: parent node * @prev: previous child of the parent node, or NULL to get first * * This function is like of_get_next_child(), except that it * automatically skips any disabled nodes (i.e. status = "disabled"). */ struct device_node *of_get_next_available_child(const struct device_node *node, struct device_node *prev) { return of_get_next_status_child(node, prev, __of_device_is_available); } EXPORT_SYMBOL(of_get_next_available_child); /** * of_get_next_reserved_child - Find the next reserved child node * @node: parent node * @prev: previous child of the parent node, or NULL to get first * * This function is like of_get_next_child(), except that it * automatically skips any disabled nodes (i.e. status = "disabled"). */ struct device_node *of_get_next_reserved_child(const struct device_node *node, struct device_node *prev) { return of_get_next_status_child(node, prev, __of_device_is_reserved); } EXPORT_SYMBOL(of_get_next_reserved_child); /** * of_get_next_cpu_node - Iterate on cpu nodes * @prev: previous child of the /cpus node, or NULL to get first * * Unusable CPUs (those with the status property set to "fail" or "fail-...") * will be skipped. * * Return: A cpu node pointer with refcount incremented, use of_node_put() * on it when done. Returns NULL when prev is the last child. Decrements * the refcount of prev. */ struct device_node *of_get_next_cpu_node(struct device_node *prev) { struct device_node *next = NULL; unsigned long flags; struct device_node *node; if (!prev) node = of_find_node_by_path("/cpus"); raw_spin_lock_irqsave(&devtree_lock, flags); if (prev) next = prev->sibling; else if (node) { next = node->child; of_node_put(node); } for (; next; next = next->sibling) { if (__of_device_is_fail(next)) continue; if (!(of_node_name_eq(next, "cpu") || __of_node_is_type(next, "cpu"))) continue; if (of_node_get(next)) break; } of_node_put(prev); raw_spin_unlock_irqrestore(&devtree_lock, flags); return next; } EXPORT_SYMBOL(of_get_next_cpu_node); /** * of_get_compatible_child - Find compatible child node * @parent: parent node * @compatible: compatible string * * Lookup child node whose compatible property contains the given compatible * string. * * Return: a node pointer with refcount incremented, use of_node_put() on it * when done; or NULL if not found. */ struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible) { struct device_node *child; for_each_child_of_node(parent, child) { if (of_device_is_compatible(child, compatible)) break; } return child; } EXPORT_SYMBOL(of_get_compatible_child); /** * of_get_child_by_name - Find the child node by name for a given parent * @node: parent node * @name: child name to look for. * * This function looks for child node for given matching name * * Return: A node pointer if found, with refcount incremented, use * of_node_put() on it when done. * Returns NULL if node is not found. */ struct device_node *of_get_child_by_name(const struct device_node *node, const char *name) { struct device_node *child; for_each_child_of_node(node, child) if (of_node_name_eq(child, name)) break; return child; } EXPORT_SYMBOL(of_get_child_by_name); struct device_node *__of_find_node_by_path(const struct device_node *parent, const char *path) { struct device_node *child; int len; len = strcspn(path, "/:"); if (!len) return NULL; __for_each_child_of_node(parent, child) { const char *name = kbasename(child->full_name); if (strncmp(path, name, len) == 0 && (strlen(name) == len)) return child; } return NULL; } struct device_node *__of_find_node_by_full_path(struct device_node *node, const char *path) { const char *separator = strchr(path, ':'); while (node && *path == '/') { struct device_node *tmp = node; path++; /* Increment past '/' delimiter */ node = __of_find_node_by_path(node, path); of_node_put(tmp); path = strchrnul(path, '/'); if (separator && separator < path) break; } return node; } /** * of_find_node_opts_by_path - Find a node matching a full OF path * @path: Either the full path to match, or if the path does not * start with '/', the name of a property of the /aliases * node (an alias). In the case of an alias, the node * matching the alias' value will be returned. * @opts: Address of a pointer into which to store the start of * an options string appended to the end of the path with * a ':' separator. * * Valid paths: * * /foo/bar Full path * * foo Valid alias * * foo/bar Valid alias + relative path * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { struct device_node *np = NULL; const struct property *pp; unsigned long flags; const char *separator = strchr(path, ':'); if (opts) *opts = separator ? separator + 1 : NULL; if (strcmp(path, "/") == 0) return of_node_get(of_root); /* The path could begin with an alias */ if (*path != '/') { int len; const char *p = strchrnul(path, '/'); if (separator && separator < p) p = separator; len = p - path; /* of_aliases must not be NULL */ if (!of_aliases) return NULL; for_each_property_of_node(of_aliases, pp) { if (strlen(pp->name) == len && !strncmp(pp->name, path, len)) { np = of_find_node_by_path(pp->value); break; } } if (!np) return NULL; path = p; } /* Step down the tree matching path components */ raw_spin_lock_irqsave(&devtree_lock, flags); if (!np) np = of_node_get(of_root); np = __of_find_node_by_full_path(np, path); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_opts_by_path); /** * of_find_node_by_name - Find a node by its "name" property * @from: The node to start searching from or NULL; the node * you pass will not be searched, only the next one * will. Typically, you pass what the previous call * returned. of_node_put() will be called on @from. * @name: The name string to match against * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_node_by_name(struct device_node *from, const char *name) { struct device_node *np; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); for_each_of_allnodes_from(from, np) if (of_node_name_eq(np, name) && of_node_get(np)) break; of_node_put(from); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_name); /** * of_find_node_by_type - Find a node by its "device_type" property * @from: The node to start searching from, or NULL to start searching * the entire device tree. The node you pass will not be * searched, only the next one will; typically, you pass * what the previous call returned. of_node_put() will be * called on from for you. * @type: The type string to match against * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_node_by_type(struct device_node *from, const char *type) { struct device_node *np; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); for_each_of_allnodes_from(from, np) if (__of_node_is_type(np, type) && of_node_get(np)) break; of_node_put(from); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_type); /** * of_find_compatible_node - Find a node based on type and one of the * tokens in its "compatible" property * @from: The node to start searching from or NULL, the node * you pass will not be searched, only the next one * will; typically, you pass what the previous call * returned. of_node_put() will be called on it * @type: The type string to match "device_type" or NULL to ignore * @compatible: The string to match to one of the tokens in the device * "compatible" list. * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compatible) { struct device_node *np; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); for_each_of_allnodes_from(from, np) if (__of_device_is_compatible(np, compatible, type, NULL) && of_node_get(np)) break; of_node_put(from); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_compatible_node); /** * of_find_node_with_property - Find a node which has a property with * the given name. * @from: The node to start searching from or NULL, the node * you pass will not be searched, only the next one * will; typically, you pass what the previous call * returned. of_node_put() will be called on it * @prop_name: The name of the property to look for. * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_node_with_property(struct device_node *from, const char *prop_name) { struct device_node *np; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); for_each_of_allnodes_from(from, np) { if (__of_find_property(np, prop_name, NULL)) { of_node_get(np); break; } } of_node_put(from); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_with_property); static const struct of_device_id *__of_match_node(const struct of_device_id *matches, const struct device_node *node) { const struct of_device_id *best_match = NULL; int score, best_score = 0; if (!matches) return NULL; for (; matches->name[0] || matches->type[0] || matches->compatible[0]; matches++) { score = __of_device_is_compatible(node, matches->compatible, matches->type, matches->name); if (score > best_score) { best_match = matches; best_score = score; } } return best_match; } /** * of_match_node - Tell if a device_node has a matching of_match structure * @matches: array of of device match structures to search in * @node: the of device structure to match against * * Low level utility function used by device matching. */ const struct of_device_id *of_match_node(const struct of_device_id *matches, const struct device_node *node) { const struct of_device_id *match; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); match = __of_match_node(matches, node); raw_spin_unlock_irqrestore(&devtree_lock, flags); return match; } EXPORT_SYMBOL(of_match_node); /** * of_find_matching_node_and_match - Find a node based on an of_device_id * match table. * @from: The node to start searching from or NULL, the node * you pass will not be searched, only the next one * will; typically, you pass what the previous call * returned. of_node_put() will be called on it * @matches: array of of device match structures to search in * @match: Updated to point at the matches entry which matched * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_matching_node_and_match(struct device_node *from, const struct of_device_id *matches, const struct of_device_id **match) { struct device_node *np; const struct of_device_id *m; unsigned long flags; if (match) *match = NULL; raw_spin_lock_irqsave(&devtree_lock, flags); for_each_of_allnodes_from(from, np) { m = __of_match_node(matches, np); if (m && of_node_get(np)) { if (match) *match = m; break; } } of_node_put(from); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_matching_node_and_match); /** * of_alias_from_compatible - Lookup appropriate alias for a device node * depending on compatible * @node: pointer to a device tree node * @alias: Pointer to buffer that alias value will be copied into * @len: Length of alias value * * Based on the value of the compatible property, this routine will attempt * to choose an appropriate alias value for a particular device tree node. * It does this by stripping the manufacturer prefix (as delimited by a ',') * from the first entry in the compatible list property. * * Note: The matching on just the "product" side of the compatible is a relic * from I2C and SPI. Please do not add any new user. * * Return: This routine returns 0 on success, <0 on failure. */ int of_alias_from_compatible(const struct device_node *node, char *alias, int len) { const char *compatible, *p; int cplen; compatible = of_get_property(node, "compatible", &cplen); if (!compatible || strlen(compatible) > cplen) return -ENODEV; p = strchr(compatible, ','); strscpy(alias, p ? p + 1 : compatible, len); return 0; } EXPORT_SYMBOL_GPL(of_alias_from_compatible); /** * of_find_node_by_phandle - Find a node given a phandle * @handle: phandle of the node to find * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ struct device_node *of_find_node_by_phandle(phandle handle) { struct device_node *np = NULL; unsigned long flags; u32 handle_hash; if (!handle) return NULL; handle_hash = of_phandle_cache_hash(handle); raw_spin_lock_irqsave(&devtree_lock, flags); if (phandle_cache[handle_hash] && handle == phandle_cache[handle_hash]->phandle) np = phandle_cache[handle_hash]; if (!np) { for_each_of_allnodes(np) if (np->phandle == handle && !of_node_check_flag(np, OF_DETACHED)) { phandle_cache[handle_hash] = np; break; } } of_node_get(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_phandle); void of_print_phandle_args(const char *msg, const struct of_phandle_args *args) { int i; printk("%s %pOF", msg, args->np); for (i = 0; i < args->args_count; i++) { const char delim = i ? ',' : ':'; pr_cont("%c%08x", delim, args->args[i]); } pr_cont("\n"); } int of_phandle_iterator_init(struct of_phandle_iterator *it, const struct device_node *np, const char *list_name, const char *cells_name, int cell_count) { const __be32 *list; int size; memset(it, 0, sizeof(*it)); /* * one of cell_count or cells_name must be provided to determine the * argument length. */ if (cell_count < 0 && !cells_name) return -EINVAL; list = of_get_property(np, list_name, &size); if (!list) return -ENOENT; it->cells_name = cells_name; it->cell_count = cell_count; it->parent = np; it->list_end = list + size / sizeof(*list); it->phandle_end = list; it->cur = list; return 0; } EXPORT_SYMBOL_GPL(of_phandle_iterator_init); int of_phandle_iterator_next(struct of_phandle_iterator *it) { uint32_t count = 0; if (it->node) { of_node_put(it->node); it->node = NULL; } if (!it->cur || it->phandle_end >= it->list_end) return -ENOENT; it->cur = it->phandle_end; /* If phandle is 0, then it is an empty entry with no arguments. */ it->phandle = be32_to_cpup(it->cur++); if (it->phandle) { /* * Find the provider node and parse the #*-cells property to * determine the argument length. */ it->node = of_find_node_by_phandle(it->phandle); if (it->cells_name) { if (!it->node) { pr_err("%pOF: could not find phandle %d\n", it->parent, it->phandle); goto err; } if (of_property_read_u32(it->node, it->cells_name, &count)) { /* * If both cell_count and cells_name is given, * fall back to cell_count in absence * of the cells_name property */ if (it->cell_count >= 0) { count = it->cell_count; } else { pr_err("%pOF: could not get %s for %pOF\n", it->parent, it->cells_name, it->node); goto err; } } } else { count = it->cell_count; } /* * Make sure that the arguments actually fit in the remaining * property data length */ if (it->cur + count > it->list_end) { if (it->cells_name) pr_err("%pOF: %s = %d found %td\n", it->parent, it->cells_name, count, it->list_end - it->cur); else pr_err("%pOF: phandle %s needs %d, found %td\n", it->parent, of_node_full_name(it->node), count, it->list_end - it->cur); goto err; } } it->phandle_end = it->cur + count; it->cur_count = count; return 0; err: if (it->node) { of_node_put(it->node); it->node = NULL; } return -EINVAL; } EXPORT_SYMBOL_GPL(of_phandle_iterator_next); int of_phandle_iterator_args(struct of_phandle_iterator *it, uint32_t *args, int size) { int i, count; count = it->cur_count; if (WARN_ON(size < count)) count = size; for (i = 0; i < count; i++) args[i] = be32_to_cpup(it->cur++); return count; } int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int cell_count, int index, struct of_phandle_args *out_args) { struct of_phandle_iterator it; int rc, cur_index = 0; if (index < 0) return -EINVAL; /* Loop over the phandles until all the requested entry is found */ of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) { /* * All of the error cases bail out of the loop, so at * this point, the parsing is successful. If the requested * index matches, then fill the out_args structure and return, * or return -ENOENT for an empty entry. */ rc = -ENOENT; if (cur_index == index) { if (!it.phandle) goto err; if (out_args) { int c; c = of_phandle_iterator_args(&it, out_args->args, MAX_PHANDLE_ARGS); out_args->np = it.node; out_args->args_count = c; } else { of_node_put(it.node); } /* Found it! return success */ return 0; } cur_index++; } /* * Unlock node before returning result; will be one of: * -ENOENT : index is for empty phandle * -EINVAL : parsing error on data */ err: of_node_put(it.node); return rc; } EXPORT_SYMBOL(__of_parse_phandle_with_args); /** * of_parse_phandle_with_args_map() - Find a node pointed by phandle in a list and remap it * @np: pointer to a device tree node containing a list * @list_name: property name that contains a list * @stem_name: stem of property names that specify phandles' arguments count * @index: index of a phandle to parse out * @out_args: optional pointer to output arguments structure (will be filled) * * This function is useful to parse lists of phandles and their arguments. * Returns 0 on success and fills out_args, on error returns appropriate errno * value. The difference between this function and of_parse_phandle_with_args() * is that this API remaps a phandle if the node the phandle points to has * a <@stem_name>-map property. * * Caller is responsible to call of_node_put() on the returned out_args->np * pointer. * * Example:: * * phandle1: node1 { * #list-cells = <2>; * }; * * phandle2: node2 { * #list-cells = <1>; * }; * * phandle3: node3 { * #list-cells = <1>; * list-map = <0 &phandle2 3>, * <1 &phandle2 2>, * <2 &phandle1 5 1>; * list-map-mask = <0x3>; * }; * * node4 { * list = <&phandle1 1 2 &phandle3 0>; * }; * * To get a device_node of the ``node2`` node you may call this: * of_parse_phandle_with_args(node4, "list", "list", 1, &args); */ int of_parse_phandle_with_args_map(const struct device_node *np, const char *list_name, const char *stem_name, int index, struct of_phandle_args *out_args) { char *cells_name __free(kfree) = kasprintf(GFP_KERNEL, "#%s-cells", stem_name); char *map_name __free(kfree) = kasprintf(GFP_KERNEL, "%s-map", stem_name); char *mask_name __free(kfree) = kasprintf(GFP_KERNEL, "%s-map-mask", stem_name); char *pass_name __free(kfree) = kasprintf(GFP_KERNEL, "%s-map-pass-thru", stem_name); struct device_node *cur, *new = NULL; const __be32 *map, *mask, *pass; static const __be32 dummy_mask[] = { [0 ... (MAX_PHANDLE_ARGS - 1)] = cpu_to_be32(~0) }; static const __be32 dummy_pass[] = { [0 ... (MAX_PHANDLE_ARGS - 1)] = cpu_to_be32(0) }; __be32 initial_match_array[MAX_PHANDLE_ARGS]; const __be32 *match_array = initial_match_array; int i, ret, map_len, match; u32 list_size, new_size; if (index < 0) return -EINVAL; if (!cells_name || !map_name || !mask_name || !pass_name) return -ENOMEM; ret = __of_parse_phandle_with_args(np, list_name, cells_name, -1, index, out_args); if (ret) return ret; /* Get the #<list>-cells property */ cur = out_args->np; ret = of_property_read_u32(cur, cells_name, &list_size); if (ret < 0) goto put; /* Precalculate the match array - this simplifies match loop */ for (i = 0; i < list_size; i++) initial_match_array[i] = cpu_to_be32(out_args->args[i]); ret = -EINVAL; while (cur) { /* Get the <list>-map property */ map = of_get_property(cur, map_name, &map_len); if (!map) { return 0; } map_len /= sizeof(u32); /* Get the <list>-map-mask property (optional) */ mask = of_get_property(cur, mask_name, NULL); if (!mask) mask = dummy_mask; /* Iterate through <list>-map property */ match = 0; while (map_len > (list_size + 1) && !match) { /* Compare specifiers */ match = 1; for (i = 0; i < list_size; i++, map_len--) match &= !((match_array[i] ^ *map++) & mask[i]); of_node_put(new); new = of_find_node_by_phandle(be32_to_cpup(map)); map++; map_len--; /* Check if not found */ if (!new) { ret = -EINVAL; goto put; } if (!of_device_is_available(new)) match = 0; ret = of_property_read_u32(new, cells_name, &new_size); if (ret) goto put; /* Check for malformed properties */ if (WARN_ON(new_size > MAX_PHANDLE_ARGS) || map_len < new_size) { ret = -EINVAL; goto put; } /* Move forward by new node's #<list>-cells amount */ map += new_size; map_len -= new_size; } if (!match) { ret = -ENOENT; goto put; } /* Get the <list>-map-pass-thru property (optional) */ pass = of_get_property(cur, pass_name, NULL); if (!pass) pass = dummy_pass; /* * Successfully parsed a <list>-map translation; copy new * specifier into the out_args structure, keeping the * bits specified in <list>-map-pass-thru. */ for (i = 0; i < new_size; i++) { __be32 val = *(map - new_size + i); if (i < list_size) { val &= ~pass[i]; val |= cpu_to_be32(out_args->args[i]) & pass[i]; } initial_match_array[i] = val; out_args->args[i] = be32_to_cpu(val); } out_args->args_count = list_size = new_size; /* Iterate again with new provider */ out_args->np = new; of_node_put(cur); cur = new; new = NULL; } put: of_node_put(cur); of_node_put(new); return ret; } EXPORT_SYMBOL(of_parse_phandle_with_args_map); /** * of_count_phandle_with_args() - Find the number of phandles references in a property * @np: pointer to a device tree node containing a list * @list_name: property name that contains a list * @cells_name: property name that specifies phandles' arguments count * * Return: The number of phandle + argument tuples within a property. It * is a typical pattern to encode a list of phandle and variable * arguments into a single property. The number of arguments is encoded * by a property in the phandle-target node. For example, a gpios * property would contain a list of GPIO specifies consisting of a * phandle and 1 or more arguments. The number of arguments are * determined by the #gpio-cells property in the node pointed to by the * phandle. */ int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name) { struct of_phandle_iterator it; int rc, cur_index = 0; /* * If cells_name is NULL we assume a cell count of 0. This makes * counting the phandles trivial as each 32bit word in the list is a * phandle and no arguments are to consider. So we don't iterate through * the list but just use the length to determine the phandle count. */ if (!cells_name) { const __be32 *list; int size; list = of_get_property(np, list_name, &size); if (!list) return -ENOENT; return size / sizeof(*list); } rc = of_phandle_iterator_init(&it, np, list_name, cells_name, -1); if (rc) return rc; while ((rc = of_phandle_iterator_next(&it)) == 0) cur_index += 1; if (rc != -ENOENT) return rc; return cur_index; } EXPORT_SYMBOL(of_count_phandle_with_args); static struct property *__of_remove_property_from_list(struct property **list, struct property *prop) { struct property **next; for (next = list; *next; next = &(*next)->next) { if (*next == prop) { *next = prop->next; prop->next = NULL; return prop; } } return NULL; } /** * __of_add_property - Add a property to a node without lock operations * @np: Caller's Device Node * @prop: Property to add */ int __of_add_property(struct device_node *np, struct property *prop) { int rc = 0; unsigned long flags; struct property **next; raw_spin_lock_irqsave(&devtree_lock, flags); __of_remove_property_from_list(&np->deadprops, prop); prop->next = NULL; next = &np->properties; while (*next) { if (strcmp(prop->name, (*next)->name) == 0) { /* duplicate ! don't insert it */ rc = -EEXIST; goto out_unlock; } next = &(*next)->next; } *next = prop; out_unlock: raw_spin_unlock_irqrestore(&devtree_lock, flags); if (rc) return rc; __of_add_property_sysfs(np, prop); return 0; } /** * of_add_property - Add a property to a node * @np: Caller's Device Node * @prop: Property to add */ int of_add_property(struct device_node *np, struct property *prop) { int rc; mutex_lock(&of_mutex); rc = __of_add_property(np, prop); mutex_unlock(&of_mutex); if (!rc) of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL); return rc; } EXPORT_SYMBOL_GPL(of_add_property); int __of_remove_property(struct device_node *np, struct property *prop) { unsigned long flags; int rc = -ENODEV; raw_spin_lock_irqsave(&devtree_lock, flags); if (__of_remove_property_from_list(&np->properties, prop)) { /* Found the property, add it to deadprops list */ prop->next = np->deadprops; np->deadprops = prop; rc = 0; } raw_spin_unlock_irqrestore(&devtree_lock, flags); if (rc) return rc; __of_remove_property_sysfs(np, prop); return 0; } /** * of_remove_property - Remove a property from a node. * @np: Caller's Device Node * @prop: Property to remove * * Note that we don't actually remove it, since we have given out * who-knows-how-many pointers to the data using get-property. * Instead we just move the property to the "dead properties" * list, so it won't be found any more. */ int of_remove_property(struct device_node *np, struct property *prop) { int rc; if (!prop) return -ENODEV; mutex_lock(&of_mutex); rc = __of_remove_property(np, prop); mutex_unlock(&of_mutex); if (!rc) of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL); return rc; } EXPORT_SYMBOL_GPL(of_remove_property); int __of_update_property(struct device_node *np, struct property *newprop, struct property **oldpropp) { struct property **next, *oldprop; unsigned long flags; raw_spin_lock_irqsave(&devtree_lock, flags); __of_remove_property_from_list(&np->deadprops, newprop); for (next = &np->properties; *next; next = &(*next)->next) { if (of_prop_cmp((*next)->name, newprop->name) == 0) break; } *oldpropp = oldprop = *next; if (oldprop) { /* replace the node */ newprop->next = oldprop->next; *next = newprop; oldprop->next = np->deadprops; np->deadprops = oldprop; } else { /* new node */ newprop->next = NULL; *next = newprop; } raw_spin_unlock_irqrestore(&devtree_lock, flags); __of_update_property_sysfs(np, newprop, oldprop); return 0; } /* * of_update_property - Update a property in a node, if the property does * not exist, add it. * * Note that we don't actually remove it, since we have given out * who-knows-how-many pointers to the data using get-property. * Instead we just move the property to the "dead properties" list, * and add the new property to the property list */ int of_update_property(struct device_node *np, struct property *newprop) { struct property *oldprop; int rc; if (!newprop->name) return -EINVAL; mutex_lock(&of_mutex); rc = __of_update_property(np, newprop, &oldprop); mutex_unlock(&of_mutex); if (!rc) of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop); return rc; } static void of_alias_add(struct alias_prop *ap, struct device_node *np, int id, const char *stem, int stem_len) { ap->np = np; ap->id = id; strscpy(ap->stem, stem, stem_len + 1); list_add_tail(&ap->link, &aliases_lookup); pr_debug("adding DT alias:%s: stem=%s id=%i node=%pOF\n", ap->alias, ap->stem, ap->id, np); } /** * of_alias_scan - Scan all properties of the 'aliases' node * @dt_alloc: An allocator that provides a virtual address to memory * for storing the resulting tree * * The function scans all the properties of the 'aliases' node and populates * the global lookup table with the properties. */ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) { const struct property *pp; of_aliases = of_find_node_by_path("/aliases"); of_chosen = of_find_node_by_path("/chosen"); if (of_chosen == NULL) of_chosen = of_find_node_by_path("/chosen@0"); if (of_chosen) { /* linux,stdout-path and /aliases/stdout are for legacy compatibility */ const char *name = NULL; if (of_property_read_string(of_chosen, "stdout-path", &name)) of_property_read_string(of_chosen, "linux,stdout-path", &name); if (IS_ENABLED(CONFIG_PPC) && !name) of_property_read_string(of_aliases, "stdout", &name); if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); if (of_stdout) of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT; } if (!of_aliases) return; for_each_property_of_node(of_aliases, pp) { const char *start = pp->name; const char *end = start + strlen(start); struct device_node *np; struct alias_prop *ap; int id, len; /* Skip those we do not want to proceed */ if (!strcmp(pp->name, "name") || !strcmp(pp->name, "phandle") || !strcmp(pp->name, "linux,phandle")) continue; np = of_find_node_by_path(pp->value); if (!np) continue; /* walk the alias backwards to extract the id and work out * the 'stem' string */ while (isdigit(*(end-1)) && end > start) end--; len = end - start; if (kstrtoint(end, 10, &id) < 0) continue; /* Allocate an alias_prop with enough space for the stem */ ap = dt_alloc(sizeof(*ap) + len + 1, __alignof__(*ap)); if (!ap) continue; memset(ap, 0, sizeof(*ap) + len + 1); ap->alias = start; of_alias_add(ap, np, id, start, len); } } /** * of_alias_get_id - Get alias id for the given device_node * @np: Pointer to the given device_node * @stem: Alias stem of the given device_node * * The function travels the lookup table to get the alias id for the given * device_node and alias stem. * * Return: The alias id if found. */ int of_alias_get_id(const struct device_node *np, const char *stem) { struct alias_prop *app; int id = -ENODEV; mutex_lock(&of_mutex); list_for_each_entry(app, &aliases_lookup, link) { if (strcmp(app->stem, stem) != 0) continue; if (np == app->np) { id = app->id; break; } } mutex_unlock(&of_mutex); return id; } EXPORT_SYMBOL_GPL(of_alias_get_id); /** * of_alias_get_highest_id - Get highest alias id for the given stem * @stem: Alias stem to be examined * * The function travels the lookup table to get the highest alias id for the * given alias stem. It returns the alias id if found. */ int of_alias_get_highest_id(const char *stem) { struct alias_prop *app; int id = -ENODEV; mutex_lock(&of_mutex); list_for_each_entry(app, &aliases_lookup, link) { if (strcmp(app->stem, stem) != 0) continue; if (app->id > id) id = app->id; } mutex_unlock(&of_mutex); return id; } EXPORT_SYMBOL_GPL(of_alias_get_highest_id); /** * of_console_check() - Test and setup console for DT setup * @dn: Pointer to device node * @name: Name to use for preferred console without index. ex. "ttyS" * @index: Index to use for preferred console. * * Check if the given device node matches the stdout-path property in the * /chosen node. If it does then register it as the preferred console. * * Return: TRUE if console successfully setup. Otherwise return FALSE. */ bool of_console_check(const struct device_node *dn, char *name, int index) { if (!dn || dn != of_stdout || console_set_on_cmdline) return false; /* * XXX: cast `options' to char pointer to suppress complication * warnings: printk, UART and console drivers expect char pointer. */ return !add_preferred_console(name, index, (char *)of_stdout_options); } EXPORT_SYMBOL_GPL(of_console_check); /** * of_find_next_cache_node - Find a node's subsidiary cache * @np: node of type "cpu" or "cache" * * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. Caller should hold a reference * to np. */ struct device_node *of_find_next_cache_node(const struct device_node *np) { struct device_node *child, *cache_node; cache_node = of_parse_phandle(np, "l2-cache", 0); if (!cache_node) cache_node = of_parse_phandle(np, "next-level-cache", 0); if (cache_node) return cache_node; /* OF on pmac has nodes instead of properties named "l2-cache" * beneath CPU nodes. */ if (IS_ENABLED(CONFIG_PPC_PMAC) && of_node_is_type(np, "cpu")) for_each_child_of_node(np, child) if (of_node_is_type(child, "cache")) return child; return NULL; } /** * of_find_last_cache_level - Find the level at which the last cache is * present for the given logical cpu * * @cpu: cpu number(logical index) for which the last cache level is needed * * Return: The level at which the last cache is present. It is exactly * same as the total number of cache levels for the given logical cpu. */ int of_find_last_cache_level(unsigned int cpu) { u32 cache_level = 0; struct device_node *prev = NULL, *np = of_cpu_device_node_get(cpu); while (np) { of_node_put(prev); prev = np; np = of_find_next_cache_node(np); } of_property_read_u32(prev, "cache-level", &cache_level); of_node_put(prev); return cache_level; } /** * of_map_id - Translate an ID through a downstream mapping. * @np: root complex device node. * @id: device ID to map. * @map_name: property name of the map to use. * @map_mask_name: optional property name of the mask to use. * @target: optional pointer to a target device node. * @id_out: optional pointer to receive the translated ID. * * Given a device ID, look up the appropriate implementation-defined * platform ID and/or the target device which receives transactions on that * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or * @id_out may be NULL if only the other is required. If @target points to * a non-NULL device node pointer, only entries targeting that node will be * matched; if it points to a NULL value, it will receive the device node of * the first matching target phandle, with a reference held. * * Return: 0 on success or a standard error code on failure. */ int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { u32 map_mask, masked_id; int map_len; const __be32 *map = NULL; if (!np || !map_name || (!target && !id_out)) return -EINVAL; map = of_get_property(np, map_name, &map_len); if (!map) { if (target) return -ENODEV; /* Otherwise, no map implies no translation */ *id_out = id; return 0; } if (!map_len || map_len % (4 * sizeof(*map))) { pr_err("%pOF: Error: Bad %s length: %d\n", np, map_name, map_len); return -EINVAL; } /* The default is to select all bits. */ map_mask = 0xffffffff; /* * Can be overridden by "{iommu,msi}-map-mask" property. * If of_property_read_u32() fails, the default is used. */ if (map_mask_name) of_property_read_u32(np, map_mask_name, &map_mask); masked_id = map_mask & id; for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) { struct device_node *phandle_node; u32 id_base = be32_to_cpup(map + 0); u32 phandle = be32_to_cpup(map + 1); u32 out_base = be32_to_cpup(map + 2); u32 id_len = be32_to_cpup(map + 3); if (id_base & ~map_mask) { pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores id-base (0x%x)\n", np, map_name, map_name, map_mask, id_base); return -EFAULT; } if (masked_id < id_base || masked_id >= id_base + id_len) continue; phandle_node = of_find_node_by_phandle(phandle); if (!phandle_node) return -ENODEV; if (target) { if (*target) of_node_put(phandle_node); else *target = phandle_node; if (*target != phandle_node) continue; } if (id_out) *id_out = masked_id - id_base + out_base; pr_debug("%pOF: %s, using mask %08x, id-base: %08x, out-base: %08x, length: %08x, id: %08x -> %08x\n", np, map_name, map_mask, id_base, out_base, id_len, id, masked_id - id_base + out_base); return 0; } pr_info("%pOF: no %s translation for id 0x%x on %pOF\n", np, map_name, id, target && *target ? *target : NULL); /* Bypasses translation */ if (id_out) *id_out = id; return 0; } EXPORT_SYMBOL_GPL(of_map_id); |
1 6 4 2 5 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 | // SPDX-License-Identifier: GPL-2.0-or-later /* * CALIPSO - Common Architecture Label IPv6 Security Option * * This is an implementation of the CALIPSO protocol as specified in * RFC 5570. * * Authors: Paul Moore <paul.moore@hp.com> * Huw Davies <huw@codeweavers.com> */ /* (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015 */ #include <linux/init.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/jhash.h> #include <linux/audit.h> #include <linux/slab.h> #include <net/ip.h> #include <net/icmp.h> #include <net/tcp.h> #include <net/netlabel.h> #include <net/calipso.h> #include <linux/atomic.h> #include <linux/bug.h> #include <linux/unaligned.h> #include <linux/crc-ccitt.h> /* Maximium size of the calipso option including * the two-byte TLV header. */ #define CALIPSO_OPT_LEN_MAX (2 + 252) /* Size of the minimum calipso option including * the two-byte TLV header. */ #define CALIPSO_HDR_LEN (2 + 8) /* Maximium size of the calipso option including * the two-byte TLV header and upto 3 bytes of * leading pad and 7 bytes of trailing pad. */ #define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7) /* Maximium size of u32 aligned buffer required to hold calipso * option. Max of 3 initial pad bytes starting from buffer + 3. * i.e. the worst case is when the previous tlv finishes on 4n + 3. */ #define CALIPSO_MAX_BUFFER (6 + CALIPSO_OPT_LEN_MAX) /* List of available DOI definitions */ static DEFINE_SPINLOCK(calipso_doi_list_lock); static LIST_HEAD(calipso_doi_list); /* Label mapping cache */ int calipso_cache_enabled = 1; int calipso_cache_bucketsize = 10; #define CALIPSO_CACHE_BUCKETBITS 7 #define CALIPSO_CACHE_BUCKETS BIT(CALIPSO_CACHE_BUCKETBITS) #define CALIPSO_CACHE_REORDERLIMIT 10 struct calipso_map_cache_bkt { spinlock_t lock; u32 size; struct list_head list; }; struct calipso_map_cache_entry { u32 hash; unsigned char *key; size_t key_len; struct netlbl_lsm_cache *lsm_data; u32 activity; struct list_head list; }; static struct calipso_map_cache_bkt *calipso_cache; static void calipso_cache_invalidate(void); static void calipso_doi_putdef(struct calipso_doi *doi_def); /* Label Mapping Cache Functions */ /** * calipso_cache_entry_free - Frees a cache entry * @entry: the entry to free * * Description: * This function frees the memory associated with a cache entry including the * LSM cache data if there are no longer any users, i.e. reference count == 0. * */ static void calipso_cache_entry_free(struct calipso_map_cache_entry *entry) { if (entry->lsm_data) netlbl_secattr_cache_free(entry->lsm_data); kfree(entry->key); kfree(entry); } /** * calipso_map_cache_hash - Hashing function for the CALIPSO cache * @key: the hash key * @key_len: the length of the key in bytes * * Description: * The CALIPSO tag hashing function. Returns a 32-bit hash value. * */ static u32 calipso_map_cache_hash(const unsigned char *key, u32 key_len) { return jhash(key, key_len, 0); } /** * calipso_cache_init - Initialize the CALIPSO cache * * Description: * Initializes the CALIPSO label mapping cache, this function should be called * before any of the other functions defined in this file. Returns zero on * success, negative values on error. * */ static int __init calipso_cache_init(void) { u32 iter; calipso_cache = kcalloc(CALIPSO_CACHE_BUCKETS, sizeof(struct calipso_map_cache_bkt), GFP_KERNEL); if (!calipso_cache) return -ENOMEM; for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) { spin_lock_init(&calipso_cache[iter].lock); calipso_cache[iter].size = 0; INIT_LIST_HEAD(&calipso_cache[iter].list); } return 0; } /** * calipso_cache_invalidate - Invalidates the current CALIPSO cache * * Description: * Invalidates and frees any entries in the CALIPSO cache. Returns zero on * success and negative values on failure. * */ static void calipso_cache_invalidate(void) { struct calipso_map_cache_entry *entry, *tmp_entry; u32 iter; for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) { spin_lock_bh(&calipso_cache[iter].lock); list_for_each_entry_safe(entry, tmp_entry, &calipso_cache[iter].list, list) { list_del(&entry->list); calipso_cache_entry_free(entry); } calipso_cache[iter].size = 0; spin_unlock_bh(&calipso_cache[iter].lock); } } /** * calipso_cache_check - Check the CALIPSO cache for a label mapping * @key: the buffer to check * @key_len: buffer length in bytes * @secattr: the security attribute struct to use * * Description: * This function checks the cache to see if a label mapping already exists for * the given key. If there is a match then the cache is adjusted and the * @secattr struct is populated with the correct LSM security attributes. The * cache is adjusted in the following manner if the entry is not already the * first in the cache bucket: * * 1. The cache entry's activity counter is incremented * 2. The previous (higher ranking) entry's activity counter is decremented * 3. If the difference between the two activity counters is geater than * CALIPSO_CACHE_REORDERLIMIT the two entries are swapped * * Returns zero on success, -ENOENT for a cache miss, and other negative values * on error. * */ static int calipso_cache_check(const unsigned char *key, u32 key_len, struct netlbl_lsm_secattr *secattr) { u32 bkt; struct calipso_map_cache_entry *entry; struct calipso_map_cache_entry *prev_entry = NULL; u32 hash; if (!calipso_cache_enabled) return -ENOENT; hash = calipso_map_cache_hash(key, key_len); bkt = hash & (CALIPSO_CACHE_BUCKETS - 1); spin_lock_bh(&calipso_cache[bkt].lock); list_for_each_entry(entry, &calipso_cache[bkt].list, list) { if (entry->hash == hash && entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CALIPSO; if (!prev_entry) { spin_unlock_bh(&calipso_cache[bkt].lock); return 0; } if (prev_entry->activity > 0) prev_entry->activity -= 1; if (entry->activity > prev_entry->activity && entry->activity - prev_entry->activity > CALIPSO_CACHE_REORDERLIMIT) { __list_del(entry->list.prev, entry->list.next); __list_add(&entry->list, prev_entry->list.prev, &prev_entry->list); } spin_unlock_bh(&calipso_cache[bkt].lock); return 0; } prev_entry = entry; } spin_unlock_bh(&calipso_cache[bkt].lock); return -ENOENT; } /** * calipso_cache_add - Add an entry to the CALIPSO cache * @calipso_ptr: the CALIPSO option * @secattr: the packet's security attributes * * Description: * Add a new entry into the CALIPSO label mapping cache. Add the new entry to * head of the cache bucket's list, if the cache bucket is out of room remove * the last entry in the list first. It is important to note that there is * currently no checking for duplicate keys. Returns zero on success, * negative values on failure. The key stored starts at calipso_ptr + 2, * i.e. the type and length bytes are not stored, this corresponds to * calipso_ptr[1] bytes of data. * */ static int calipso_cache_add(const unsigned char *calipso_ptr, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; u32 bkt; struct calipso_map_cache_entry *entry = NULL; struct calipso_map_cache_entry *old_entry = NULL; u32 calipso_ptr_len; if (!calipso_cache_enabled || calipso_cache_bucketsize <= 0) return 0; calipso_ptr_len = calipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; entry->key = kmemdup(calipso_ptr + 2, calipso_ptr_len, GFP_ATOMIC); if (!entry->key) { ret_val = -ENOMEM; goto cache_add_failure; } entry->key_len = calipso_ptr_len; entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len); refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1); spin_lock_bh(&calipso_cache[bkt].lock); if (calipso_cache[bkt].size < calipso_cache_bucketsize) { list_add(&entry->list, &calipso_cache[bkt].list); calipso_cache[bkt].size += 1; } else { old_entry = list_entry(calipso_cache[bkt].list.prev, struct calipso_map_cache_entry, list); list_del(&old_entry->list); list_add(&entry->list, &calipso_cache[bkt].list); calipso_cache_entry_free(old_entry); } spin_unlock_bh(&calipso_cache[bkt].lock); return 0; cache_add_failure: if (entry) calipso_cache_entry_free(entry); return ret_val; } /* DOI List Functions */ /** * calipso_doi_search - Searches for a DOI definition * @doi: the DOI to search for * * Description: * Search the DOI definition list for a DOI definition with a DOI value that * matches @doi. The caller is responsible for calling rcu_read_[un]lock(). * Returns a pointer to the DOI definition on success and NULL on failure. */ static struct calipso_doi *calipso_doi_search(u32 doi) { struct calipso_doi *iter; list_for_each_entry_rcu(iter, &calipso_doi_list, list) if (iter->doi == doi && refcount_read(&iter->refcount)) return iter; return NULL; } /** * calipso_doi_add - Add a new DOI to the CALIPSO protocol engine * @doi_def: the DOI structure * @audit_info: NetLabel audit information * * Description: * The caller defines a new DOI for use by the CALIPSO engine and calls this * function to add it to the list of acceptable domains. The caller must * ensure that the mapping table specified in @doi_def->map meets all of the * requirements of the mapping type (see calipso.h for details). Returns * zero on success and non-zero on failure. * */ static int calipso_doi_add(struct calipso_doi *doi_def, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; u32 doi; u32 doi_type; struct audit_buffer *audit_buf; doi = doi_def->doi; doi_type = doi_def->type; if (doi_def->doi == CALIPSO_DOI_UNKNOWN) goto doi_add_return; refcount_set(&doi_def->refcount, 1); spin_lock(&calipso_doi_list_lock); if (calipso_doi_search(doi_def->doi)) { spin_unlock(&calipso_doi_list_lock); ret_val = -EEXIST; goto doi_add_return; } list_add_tail_rcu(&doi_def->list, &calipso_doi_list); spin_unlock(&calipso_doi_list_lock); ret_val = 0; doi_add_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_ADD, audit_info); if (audit_buf) { const char *type_str; switch (doi_type) { case CALIPSO_MAP_PASS: type_str = "pass"; break; default: type_str = "(unknown)"; } audit_log_format(audit_buf, " calipso_doi=%u calipso_type=%s res=%u", doi, type_str, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * calipso_doi_free - Frees a DOI definition * @doi_def: the DOI definition * * Description: * This function frees all of the memory associated with a DOI definition. * */ static void calipso_doi_free(struct calipso_doi *doi_def) { kfree(doi_def); } /** * calipso_doi_free_rcu - Frees a DOI definition via the RCU pointer * @entry: the entry's RCU field * * Description: * This function is designed to be used as a callback to the call_rcu() * function so that the memory allocated to the DOI definition can be released * safely. * */ static void calipso_doi_free_rcu(struct rcu_head *entry) { struct calipso_doi *doi_def; doi_def = container_of(entry, struct calipso_doi, rcu); calipso_doi_free(doi_def); } /** * calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine * @doi: the DOI value * @audit_info: NetLabel audit information * * Description: * Removes a DOI definition from the CALIPSO engine. The NetLabel routines will * be called to release their own LSM domain mappings as well as our own * domain list. Returns zero on success and negative values on failure. * */ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) { int ret_val; struct calipso_doi *doi_def; struct audit_buffer *audit_buf; spin_lock(&calipso_doi_list_lock); doi_def = calipso_doi_search(doi); if (!doi_def) { spin_unlock(&calipso_doi_list_lock); ret_val = -ENOENT; goto doi_remove_return; } list_del_rcu(&doi_def->list); spin_unlock(&calipso_doi_list_lock); calipso_doi_putdef(doi_def); ret_val = 0; doi_remove_return: audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_DEL, audit_info); if (audit_buf) { audit_log_format(audit_buf, " calipso_doi=%u res=%u", doi, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * calipso_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that * calipso_doi_putdef() is called when the caller is done. * */ static struct calipso_doi *calipso_doi_getdef(u32 doi) { struct calipso_doi *doi_def; rcu_read_lock(); doi_def = calipso_doi_search(doi); if (!doi_def) goto doi_getdef_return; if (!refcount_inc_not_zero(&doi_def->refcount)) doi_def = NULL; doi_getdef_return: rcu_read_unlock(); return doi_def; } /** * calipso_doi_putdef - Releases a reference for the given DOI definition * @doi_def: the DOI definition * * Description: * Releases a DOI definition reference obtained from calipso_doi_getdef(). * */ static void calipso_doi_putdef(struct calipso_doi *doi_def) { if (!doi_def) return; if (!refcount_dec_and_test(&doi_def->refcount)) return; calipso_cache_invalidate(); call_rcu(&doi_def->rcu, calipso_doi_free_rcu); } /** * calipso_doi_walk - Iterate through the DOI definitions * @skip_cnt: skip past this number of DOI definitions, updated * @callback: callback for each DOI definition * @cb_arg: argument for the callback function * * Description: * Iterate over the DOI definition list, skipping the first @skip_cnt entries. * For each entry call @callback, if @callback returns a negative value stop * 'walking' through the list and return. Updates the value in @skip_cnt upon * return. Returns zero on success, negative values on failure. * */ static int calipso_doi_walk(u32 *skip_cnt, int (*callback)(struct calipso_doi *doi_def, void *arg), void *cb_arg) { int ret_val = -ENOENT; u32 doi_cnt = 0; struct calipso_doi *iter_doi; rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &calipso_doi_list, list) if (refcount_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); if (ret_val < 0) { doi_cnt--; goto doi_walk_return; } } doi_walk_return: rcu_read_unlock(); *skip_cnt = doi_cnt; return ret_val; } /** * calipso_validate - Validate a CALIPSO option * @skb: the packet * @option: the start of the option * * Description: * This routine is called to validate a CALIPSO option. * If the option is valid then %true is returned, otherwise * %false is returned. * * The caller should have already checked that the length of the * option (including the TLV header) is >= 10 and that the catmap * length is consistent with the option length. * * We leave checks on the level and categories to the socket layer. */ bool calipso_validate(const struct sk_buff *skb, const unsigned char *option) { struct calipso_doi *doi_def; bool ret_val; u16 crc, len = option[1] + 2; static const u8 zero[2]; /* The original CRC runs over the option including the TLV header * with the CRC-16 field (at offset 8) zeroed out. */ crc = crc_ccitt(0xffff, option, 8); crc = crc_ccitt(crc, zero, sizeof(zero)); if (len > 10) crc = crc_ccitt(crc, option + 10, len - 10); crc = ~crc; if (option[8] != (crc & 0xff) || option[9] != ((crc >> 8) & 0xff)) return false; rcu_read_lock(); doi_def = calipso_doi_search(get_unaligned_be32(option + 2)); ret_val = !!doi_def; rcu_read_unlock(); return ret_val; } /** * calipso_map_cat_hton - Perform a category mapping from host to network * @doi_def: the DOI definition * @secattr: the security attributes * @net_cat: the zero'd out category bitmap in network/CALIPSO format * @net_cat_len: the length of the CALIPSO bitmap in bytes * * Description: * Perform a label mapping to translate a local MLS category bitmap to the * correct CALIPSO bitmap using the given DOI definition. Returns the minimum * size in bytes of the network bitmap on success, negative values otherwise. * */ static int calipso_map_cat_hton(const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr, unsigned char *net_cat, u32 net_cat_len) { int spot = -1; u32 net_spot_max = 0; u32 net_clen_bits = net_cat_len * 8; for (;;) { spot = netlbl_catmap_walk(secattr->attr.mls.cat, spot + 1); if (spot < 0) break; if (spot >= net_clen_bits) return -ENOSPC; netlbl_bitmap_setbit(net_cat, spot, 1); if (spot > net_spot_max) net_spot_max = spot; } return (net_spot_max / 32 + 1) * 4; } /** * calipso_map_cat_ntoh - Perform a category mapping from network to host * @doi_def: the DOI definition * @net_cat: the category bitmap in network/CALIPSO format * @net_cat_len: the length of the CALIPSO bitmap in bytes * @secattr: the security attributes * * Description: * Perform a label mapping to translate a CALIPSO bitmap to the correct local * MLS category bitmap using the given DOI definition. Returns zero on * success, negative values on failure. * */ static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def, const unsigned char *net_cat, u32 net_cat_len, struct netlbl_lsm_secattr *secattr) { int ret_val; int spot = -1; u32 net_clen_bits = net_cat_len * 8; for (;;) { spot = netlbl_bitmap_walk(net_cat, net_clen_bits, spot + 1, 1); if (spot < 0) return 0; ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, spot, GFP_ATOMIC); if (ret_val != 0) return ret_val; } return -EINVAL; } /** * calipso_pad_write - Writes pad bytes in TLV format * @buf: the buffer * @offset: offset from start of buffer to write padding * @count: number of pad bytes to write * * Description: * Write @count bytes of TLV padding into @buffer starting at offset @offset. * @count should be less than 8 - see RFC 4942. * */ static int calipso_pad_write(unsigned char *buf, unsigned int offset, unsigned int count) { if (WARN_ON_ONCE(count >= 8)) return -EINVAL; switch (count) { case 0: break; case 1: buf[offset] = IPV6_TLV_PAD1; break; default: buf[offset] = IPV6_TLV_PADN; buf[offset + 1] = count - 2; if (count > 2) memset(buf + offset + 2, 0, count - 2); break; } return 0; } /** * calipso_genopt - Generate a CALIPSO option * @buf: the option buffer * @start: offset from which to write * @buf_len: the size of opt_buf * @doi_def: the CALIPSO DOI to use * @secattr: the security attributes * * Description: * Generate a CALIPSO option using the DOI definition and security attributes * passed to the function. This also generates upto three bytes of leading * padding that ensures that the option is 4n + 2 aligned. It returns the * number of bytes written (including any initial padding). */ static int calipso_genopt(unsigned char *buf, u32 start, u32 buf_len, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; u32 len, pad; u16 crc; static const unsigned char padding[4] = {2, 1, 0, 3}; unsigned char *calipso; /* CALIPSO has 4n + 2 alignment */ pad = padding[start & 3]; if (buf_len <= start + pad + CALIPSO_HDR_LEN) return -ENOSPC; if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) return -EPERM; len = CALIPSO_HDR_LEN; if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { ret_val = calipso_map_cat_hton(doi_def, secattr, buf + start + pad + len, buf_len - start - pad - len); if (ret_val < 0) return ret_val; len += ret_val; } calipso_pad_write(buf, start, pad); calipso = buf + start + pad; calipso[0] = IPV6_TLV_CALIPSO; calipso[1] = len - 2; *(__be32 *)(calipso + 2) = htonl(doi_def->doi); calipso[6] = (len - CALIPSO_HDR_LEN) / 4; calipso[7] = secattr->attr.mls.lvl; crc = ~crc_ccitt(0xffff, calipso, len); calipso[8] = crc & 0xff; calipso[9] = (crc >> 8) & 0xff; return pad + len; } /* Hop-by-hop hdr helper functions */ /** * calipso_opt_update - Replaces socket's hop options with a new set * @sk: the socket * @hop: new hop options * * Description: * Replaces @sk's hop options with @hop. @hop may be NULL to leave * the socket with no hop options. * */ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop) { struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts; txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop); txopt_put(old); if (IS_ERR(txopts)) return PTR_ERR(txopts); txopts = ipv6_update_options(sk, txopts); if (txopts) { atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); txopt_put(txopts); } return 0; } /** * calipso_tlv_len - Returns the length of the TLV * @opt: the option header * @offset: offset of the TLV within the header * * Description: * Returns the length of the TLV option at offset @offset within * the option header @opt. Checks that the entire TLV fits inside * the option header, returns a negative value if this is not the case. */ static int calipso_tlv_len(struct ipv6_opt_hdr *opt, unsigned int offset) { unsigned char *tlv = (unsigned char *)opt; unsigned int opt_len = ipv6_optlen(opt), tlv_len; if (offset < sizeof(*opt) || offset >= opt_len) return -EINVAL; if (tlv[offset] == IPV6_TLV_PAD1) return 1; if (offset + 1 >= opt_len) return -EINVAL; tlv_len = tlv[offset + 1] + 2; if (offset + tlv_len > opt_len) return -EINVAL; return tlv_len; } /** * calipso_opt_find - Finds the CALIPSO option in an IPv6 hop options header * @hop: the hop options header * @start: on return holds the offset of any leading padding * @end: on return holds the offset of the first non-pad TLV after CALIPSO * * Description: * Finds the space occupied by a CALIPSO option (including any leading and * trailing padding). * * If a CALIPSO option exists set @start and @end to the * offsets within @hop of the start of padding before the first * CALIPSO option and the end of padding after the first CALIPSO * option. In this case the function returns 0. * * In the absence of a CALIPSO option, @start and @end will be * set to the start and end of any trailing padding in the header. * This is useful when appending a new option, as the caller may want * to overwrite some of this padding. In this case the function will * return -ENOENT. */ static int calipso_opt_find(struct ipv6_opt_hdr *hop, unsigned int *start, unsigned int *end) { int ret_val = -ENOENT, tlv_len; unsigned int opt_len, offset, offset_s = 0, offset_e = 0; unsigned char *opt = (unsigned char *)hop; opt_len = ipv6_optlen(hop); offset = sizeof(*hop); while (offset < opt_len) { tlv_len = calipso_tlv_len(hop, offset); if (tlv_len < 0) return tlv_len; switch (opt[offset]) { case IPV6_TLV_PAD1: case IPV6_TLV_PADN: if (offset_e) offset_e = offset; break; case IPV6_TLV_CALIPSO: ret_val = 0; offset_e = offset; break; default: if (offset_e == 0) offset_s = offset; else goto out; } offset += tlv_len; } out: if (offset_s) *start = offset_s + calipso_tlv_len(hop, offset_s); else *start = sizeof(*hop); if (offset_e) *end = offset_e + calipso_tlv_len(hop, offset_e); else *end = opt_len; return ret_val; } /** * calipso_opt_insert - Inserts a CALIPSO option into an IPv6 hop opt hdr * @hop: the original hop options header * @doi_def: the CALIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Creates a new hop options header based on @hop with a * CALIPSO option added to it. If @hop already contains a CALIPSO * option this is overwritten, otherwise the new option is appended * after any existing options. If @hop is NULL then the new header * will contain just the CALIPSO option and any needed padding. * */ static struct ipv6_opt_hdr * calipso_opt_insert(struct ipv6_opt_hdr *hop, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { unsigned int start, end, buf_len, pad, hop_len; struct ipv6_opt_hdr *new; int ret_val; if (hop) { hop_len = ipv6_optlen(hop); ret_val = calipso_opt_find(hop, &start, &end); if (ret_val && ret_val != -ENOENT) return ERR_PTR(ret_val); } else { hop_len = 0; start = sizeof(*hop); end = 0; } buf_len = hop_len + start - end + CALIPSO_OPT_LEN_MAX_WITH_PAD; new = kzalloc(buf_len, GFP_ATOMIC); if (!new) return ERR_PTR(-ENOMEM); if (start > sizeof(*hop)) memcpy(new, hop, start); ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def, secattr); if (ret_val < 0) { kfree(new); return ERR_PTR(ret_val); } buf_len = start + ret_val; /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */ pad = ((buf_len & 4) + (end & 7)) & 7; calipso_pad_write((unsigned char *)new, buf_len, pad); buf_len += pad; if (end != hop_len) { memcpy((char *)new + buf_len, (char *)hop + end, hop_len - end); buf_len += hop_len - end; } new->nexthdr = 0; new->hdrlen = buf_len / 8 - 1; return new; } /** * calipso_opt_del - Removes the CALIPSO option from an option header * @hop: the original header * @new: the new header * * Description: * Creates a new header based on @hop without any CALIPSO option. If @hop * doesn't contain a CALIPSO option it returns -ENOENT. If @hop contains * no other non-padding options, it returns zero with @new set to NULL. * Otherwise it returns zero, creates a new header without the CALIPSO * option (and removing as much padding as possible) and returns with * @new set to that header. * */ static int calipso_opt_del(struct ipv6_opt_hdr *hop, struct ipv6_opt_hdr **new) { int ret_val; unsigned int start, end, delta, pad, hop_len; ret_val = calipso_opt_find(hop, &start, &end); if (ret_val) return ret_val; hop_len = ipv6_optlen(hop); if (start == sizeof(*hop) && end == hop_len) { /* There's no other option in the header so return NULL */ *new = NULL; return 0; } delta = (end - start) & ~7; *new = kzalloc(hop_len - delta, GFP_ATOMIC); if (!*new) return -ENOMEM; memcpy(*new, hop, start); (*new)->hdrlen -= delta / 8; pad = (end - start) & 7; calipso_pad_write((unsigned char *)*new, start, pad); if (end != hop_len) memcpy((char *)*new + start + pad, (char *)hop + end, hop_len - end); return 0; } /** * calipso_opt_getattr - Get the security attributes from a memory block * @calipso: the CALIPSO option * @secattr: the security attributes * * Description: * Inspect @calipso and return the security attributes in @secattr. * Returns zero on success and negative values on failure. * */ static int calipso_opt_getattr(const unsigned char *calipso, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; u32 doi, len = calipso[1], cat_len = calipso[6] * 4; struct calipso_doi *doi_def; if (cat_len + 8 > len) return -EINVAL; if (calipso_cache_check(calipso + 2, calipso[1], secattr) == 0) return 0; doi = get_unaligned_be32(calipso + 2); rcu_read_lock(); doi_def = calipso_doi_search(doi); if (!doi_def) goto getattr_return; secattr->attr.mls.lvl = calipso[7]; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (cat_len) { ret_val = calipso_map_cat_ntoh(doi_def, calipso + 10, cat_len, secattr); if (ret_val != 0) { netlbl_catmap_free(secattr->attr.mls.cat); goto getattr_return; } if (secattr->attr.mls.cat) secattr->flags |= NETLBL_SECATTR_MLS_CAT; } secattr->type = NETLBL_NLTYPE_CALIPSO; getattr_return: rcu_read_unlock(); return ret_val; } /* sock functions. */ /** * calipso_sock_getattr - Get the security attributes from a sock * @sk: the sock * @secattr: the security attributes * * Description: * Query @sk to see if there is a CALIPSO option attached to the sock and if * there is return the CALIPSO security attributes in @secattr. This function * requires that @sk be locked, or privately held, but it does not do any * locking itself. Returns zero on success and negative values on failure. * */ static int calipso_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { struct ipv6_opt_hdr *hop; int opt_len, len, ret_val = -ENOMSG, offset; unsigned char *opt; struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); if (!txopts || !txopts->hopopt) goto done; hop = txopts->hopopt; opt = (unsigned char *)hop; opt_len = ipv6_optlen(hop); offset = sizeof(*hop); while (offset < opt_len) { len = calipso_tlv_len(hop, offset); if (len < 0) { ret_val = len; goto done; } switch (opt[offset]) { case IPV6_TLV_CALIPSO: if (len < CALIPSO_HDR_LEN) ret_val = -EINVAL; else ret_val = calipso_opt_getattr(&opt[offset], secattr); goto done; default: offset += len; break; } } done: txopt_put(txopts); return ret_val; } /** * calipso_sock_setattr - Add a CALIPSO option to a socket * @sk: the socket * @doi_def: the CALIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CALIPSO option on the given socket using the DOI definition and * security attributes passed to the function. This function requires * exclusive access to @sk, which means it either needs to be in the * process of being created or locked. Returns zero on success and negative * values on failure. * */ static int calipso_sock_setattr(struct sock *sk, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; struct ipv6_opt_hdr *old, *new; struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); old = NULL; if (txopts) old = txopts->hopopt; new = calipso_opt_insert(old, doi_def, secattr); txopt_put(txopts); if (IS_ERR(new)) return PTR_ERR(new); ret_val = calipso_opt_update(sk, new); kfree(new); return ret_val; } /** * calipso_sock_delattr - Delete the CALIPSO option from a socket * @sk: the socket * * Description: * Removes the CALIPSO option from a socket, if present. * */ static void calipso_sock_delattr(struct sock *sk) { struct ipv6_opt_hdr *new_hop; struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); if (!txopts || !txopts->hopopt) goto done; if (calipso_opt_del(txopts->hopopt, &new_hop)) goto done; calipso_opt_update(sk, new_hop); kfree(new_hop); done: txopt_put(txopts); } /* request sock functions. */ /** * calipso_req_setattr - Add a CALIPSO option to a connection request socket * @req: the connection request socket * @doi_def: the CALIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CALIPSO option on the given socket using the DOI definition and * security attributes passed to the function. Returns zero on success and * negative values on failure. * */ static int calipso_req_setattr(struct request_sock *req, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { struct ipv6_txoptions *txopts; struct inet_request_sock *req_inet = inet_rsk(req); struct ipv6_opt_hdr *old, *new; struct sock *sk = sk_to_full_sk(req_to_sk(req)); if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) old = req_inet->ipv6_opt->hopopt; else old = NULL; new = calipso_opt_insert(old, doi_def, secattr); if (IS_ERR(new)) return PTR_ERR(new); txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); kfree(new); if (IS_ERR(txopts)) return PTR_ERR(txopts); txopts = xchg(&req_inet->ipv6_opt, txopts); if (txopts) { atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); txopt_put(txopts); } return 0; } /** * calipso_req_delattr - Delete the CALIPSO option from a request socket * @req: the request socket * * Description: * Removes the CALIPSO option from a request socket, if present. * */ static void calipso_req_delattr(struct request_sock *req) { struct inet_request_sock *req_inet = inet_rsk(req); struct ipv6_opt_hdr *new; struct ipv6_txoptions *txopts; struct sock *sk = sk_to_full_sk(req_to_sk(req)); if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) return; if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new)) return; /* Nothing to do */ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new); if (!IS_ERR(txopts)) { txopts = xchg(&req_inet->ipv6_opt, txopts); if (txopts) { atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); txopt_put(txopts); } } kfree(new); } /* skbuff functions. */ /** * calipso_skbuff_optptr - Find the CALIPSO option in the packet * @skb: the packet * * Description: * Parse the packet's IP header looking for a CALIPSO option. Returns a pointer * to the start of the CALIPSO option on success, NULL if one if not found. * */ static unsigned char *calipso_skbuff_optptr(const struct sk_buff *skb) { const struct ipv6hdr *ip6_hdr = ipv6_hdr(skb); int offset; if (ip6_hdr->nexthdr != NEXTHDR_HOP) return NULL; offset = ipv6_find_tlv(skb, sizeof(*ip6_hdr), IPV6_TLV_CALIPSO); if (offset >= 0) return (unsigned char *)ip6_hdr + offset; return NULL; } /** * calipso_skbuff_setattr - Set the CALIPSO option on a packet * @skb: the packet * @doi_def: the CALIPSO DOI to use * @secattr: the security attributes * * Description: * Set the CALIPSO option on the given packet based on the security attributes. * Returns a pointer to the IP header on success and NULL on failure. * */ static int calipso_skbuff_setattr(struct sk_buff *skb, const struct calipso_doi *doi_def, const struct netlbl_lsm_secattr *secattr) { int ret_val; struct ipv6hdr *ip6_hdr; struct ipv6_opt_hdr *hop; unsigned char buf[CALIPSO_MAX_BUFFER]; int len_delta, new_end, pad, payload; unsigned int start, end; ip6_hdr = ipv6_hdr(skb); if (ip6_hdr->nexthdr == NEXTHDR_HOP) { hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); ret_val = calipso_opt_find(hop, &start, &end); if (ret_val && ret_val != -ENOENT) return ret_val; } else { start = 0; end = 0; } memset(buf, 0, sizeof(buf)); ret_val = calipso_genopt(buf, start & 3, sizeof(buf), doi_def, secattr); if (ret_val < 0) return ret_val; new_end = start + ret_val; /* At this point new_end aligns to 4n, so (new_end & 4) pads to 8n */ pad = ((new_end & 4) + (end & 7)) & 7; len_delta = new_end - (int)end + pad; ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); if (ret_val < 0) return ret_val; ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */ if (len_delta) { if (len_delta > 0) skb_push(skb, len_delta); else skb_pull(skb, -len_delta); memmove((char *)ip6_hdr - len_delta, ip6_hdr, sizeof(*ip6_hdr) + start); skb_reset_network_header(skb); ip6_hdr = ipv6_hdr(skb); payload = ntohs(ip6_hdr->payload_len); ip6_hdr->payload_len = htons(payload + len_delta); } hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); if (start == 0) { struct ipv6_opt_hdr *new_hop = (struct ipv6_opt_hdr *)buf; new_hop->nexthdr = ip6_hdr->nexthdr; new_hop->hdrlen = len_delta / 8 - 1; ip6_hdr->nexthdr = NEXTHDR_HOP; } else { hop->hdrlen += len_delta / 8; } memcpy((char *)hop + start, buf + (start & 3), new_end - start); calipso_pad_write((unsigned char *)hop, new_end, pad); return 0; } /** * calipso_skbuff_delattr - Delete any CALIPSO options from a packet * @skb: the packet * * Description: * Removes any and all CALIPSO options from the given packet. Returns zero on * success, negative values on failure. * */ static int calipso_skbuff_delattr(struct sk_buff *skb) { int ret_val; struct ipv6hdr *ip6_hdr; struct ipv6_opt_hdr *old_hop; u32 old_hop_len, start = 0, end = 0, delta, size, pad; if (!calipso_skbuff_optptr(skb)) return 0; /* since we are changing the packet we should make a copy */ ret_val = skb_cow(skb, skb_headroom(skb)); if (ret_val < 0) return ret_val; ip6_hdr = ipv6_hdr(skb); old_hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); old_hop_len = ipv6_optlen(old_hop); ret_val = calipso_opt_find(old_hop, &start, &end); if (ret_val) return ret_val; if (start == sizeof(*old_hop) && end == old_hop_len) { /* There's no other option in the header so we delete * the whole thing. */ delta = old_hop_len; size = sizeof(*ip6_hdr); ip6_hdr->nexthdr = old_hop->nexthdr; } else { delta = (end - start) & ~7; if (delta) old_hop->hdrlen -= delta / 8; pad = (end - start) & 7; size = sizeof(*ip6_hdr) + start + pad; calipso_pad_write((unsigned char *)old_hop, start, pad); } if (delta) { skb_pull(skb, delta); memmove((char *)ip6_hdr + delta, ip6_hdr, size); skb_reset_network_header(skb); } return 0; } static const struct netlbl_calipso_ops ops = { .doi_add = calipso_doi_add, .doi_free = calipso_doi_free, .doi_remove = calipso_doi_remove, .doi_getdef = calipso_doi_getdef, .doi_putdef = calipso_doi_putdef, .doi_walk = calipso_doi_walk, .sock_getattr = calipso_sock_getattr, .sock_setattr = calipso_sock_setattr, .sock_delattr = calipso_sock_delattr, .req_setattr = calipso_req_setattr, .req_delattr = calipso_req_delattr, .opt_getattr = calipso_opt_getattr, .skbuff_optptr = calipso_skbuff_optptr, .skbuff_setattr = calipso_skbuff_setattr, .skbuff_delattr = calipso_skbuff_delattr, .cache_invalidate = calipso_cache_invalidate, .cache_add = calipso_cache_add }; /** * calipso_init - Initialize the CALIPSO module * * Description: * Initialize the CALIPSO module and prepare it for use. Returns zero on * success and negative values on failure. * */ int __init calipso_init(void) { int ret_val; ret_val = calipso_cache_init(); if (!ret_val) netlbl_calipso_ops_register(&ops); return ret_val; } void calipso_exit(void) { netlbl_calipso_ops_register(NULL); calipso_cache_invalidate(); kfree(calipso_cache); } |
355 355 354 355 216 214 5 214 3 217 216 217 19 199 91 33 87 102 102 12 95 12 12 3 1 3 337 361 364 345 4 16 2 197 207 346 361 346 231 211 157 157 280 211 217 87 87 2 23 23 23 23 79 36 73 87 78 78 3 10 17 62 9 8 1 9 337 15 333 26 340 336 25 21 102 4 87 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 International Business Machines Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * This module provides the abstraction for an SCTP transport representing * a remote transport address. For local transport addresses, we just use * union sctp_addr. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@us.ibm.com> * Xingang Guo <xingang.guo@intel.com> * Hui Huang <hui.huang@nokia.com> * Sridhar Samudrala <sri@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/types.h> #include <linux/random.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ static struct sctp_transport *sctp_transport_init(struct net *net, struct sctp_transport *peer, const union sctp_addr *addr, gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len); memset(&peer->saddr, 0, sizeof(union sctp_addr)); peer->sack_generation = 0; /* From 6.3.1 RTO Calculation: * * C1) Until an RTT measurement has been made for a packet sent to the * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ peer->rto = msecs_to_jiffies(net->sctp.rto_initial); peer->last_time_heard = 0; peer->last_time_ecne_reduced = jiffies; peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; /* Initialize the default path max_retrans. */ peer->pathmaxrxt = net->sctp.max_retrans_path; peer->pf_retrans = net->sctp.pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0); timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0); timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0); timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0); timer_setup(&peer->proto_unreach_timer, sctp_generate_proto_unreach_event, 0); /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); return peer; } /* Allocate and initialize a new transport. */ struct sctp_transport *sctp_transport_new(struct net *net, const union sctp_addr *addr, gfp_t gfp) { struct sctp_transport *transport; transport = kzalloc(sizeof(*transport), gfp); if (!transport) goto fail; if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; SCTP_DBG_OBJCNT_INC(transport); return transport; fail_init: kfree(transport); fail: return NULL; } /* This transport is no longer needed. Free up if possible, or * delay until it last reference count. */ void sctp_transport_free(struct sctp_transport *transport) { /* Try to delete the heartbeat timer. */ if (del_timer(&transport->hb_timer)) sctp_transport_put(transport); /* Delete the T3_rtx timer if it's active. * There is no point in not doing this now and letting * structure hang around in memory since we know * the transport is going away. */ if (del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); if (del_timer(&transport->reconf_timer)) sctp_transport_put(transport); if (del_timer(&transport->probe_timer)) sctp_transport_put(transport); /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) sctp_transport_put(transport); sctp_transport_put(transport); } static void sctp_transport_destroy_rcu(struct rcu_head *head) { struct sctp_transport *transport; transport = container_of(head, struct sctp_transport, rcu); dst_release(transport->dst); kfree(transport); SCTP_DBG_OBJCNT_DEC(transport); } /* Destroy the transport data structure. * Assumes there are no more users of this structure. */ static void sctp_transport_destroy(struct sctp_transport *transport) { if (unlikely(refcount_read(&transport->refcnt))) { WARN(1, "Attempt to destroy undead transport %p!\n", transport); return; } sctp_packet_free(&transport->packet); if (transport->asoc) sctp_association_put(transport->asoc); call_rcu(&transport->rcu, sctp_transport_destroy_rcu); } /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ void sctp_transport_reset_t3_rtx(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * * R1) Every time a DATA chunk is sent to any address(including a * retransmission), if the T3-rtx timer of that address is not running * start it running so that it will expire after the RTO of that * address. */ if (!timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); } void sctp_transport_reset_hb_timer(struct sctp_transport *transport) { unsigned long expires; /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); if (!mod_timer(&transport->hb_timer, expires + get_random_u32_below(transport->rto))) sctp_transport_hold(transport); } void sctp_transport_reset_reconf_timer(struct sctp_transport *transport) { if (!timer_pending(&transport->reconf_timer)) if (!mod_timer(&transport->reconf_timer, jiffies + transport->rto)) sctp_transport_hold(transport); } void sctp_transport_reset_probe_timer(struct sctp_transport *transport) { if (!mod_timer(&transport->probe_timer, jiffies + transport->probe_interval)) sctp_transport_hold(transport); } void sctp_transport_reset_raise_timer(struct sctp_transport *transport) { if (!mod_timer(&transport->probe_timer, jiffies + transport->probe_interval * 30)) sctp_transport_hold(transport); } /* This transport has been assigned to an association. * Initialize fields from the association or from the sock itself. * Register the reference count in the association. */ void sctp_transport_set_owner(struct sctp_transport *transport, struct sctp_association *asoc) { transport->asoc = asoc; sctp_association_hold(asoc); } /* Initialize the pmtu of a transport. */ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ if (!transport->dst || transport->dst->obsolete) { sctp_transport_dst_release(transport); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); } if (transport->param_flags & SPP_PMTUD_DISABLE) { struct sctp_association *asoc = transport->asoc; if (!transport->pathmtu && asoc && asoc->pathmtu) transport->pathmtu = asoc->pathmtu; if (transport->pathmtu) return; } if (transport->dst) transport->pathmtu = sctp_dst_mtu(transport->dst); else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; sctp_transport_pl_update(transport); } void sctp_transport_pl_send(struct sctp_transport *t) { if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } } else if (t->pl.state == SCTP_PL_SEARCH) { if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */ t->pl.state = SCTP_PL_BASE; /* Search -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } else { /* Normal probe failure. */ t->pl.probe_high = t->pl.probe_size; t->pl.probe_size = t->pl.pmtu; } } else if (t->pl.state == SCTP_PL_COMPLETE) { if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */ t->pl.state = SCTP_PL_BASE; /* Search Complete -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } } out: pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); t->pl.probe_count++; } bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */ t->pl.probe_size += SCTP_PL_BIG_STEP; } else if (t->pl.state == SCTP_PL_ERROR) { t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */ t->pl.pmtu = t->pl.probe_size; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); t->pl.probe_size += SCTP_PL_BIG_STEP; } else if (t->pl.state == SCTP_PL_SEARCH) { if (!t->pl.probe_high) { if (t->pl.probe_size < SCTP_MAX_PLPMTU) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); return false; } t->pl.probe_high = SCTP_MAX_PLPMTU; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { t->pl.probe_high = 0; t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */ t->pl.probe_size = t->pl.pmtu; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); sctp_transport_reset_raise_timer(t); } } else if (t->pl.state == SCTP_PL_COMPLETE) { /* Raise probe_size again after 30 * interval in Search Complete */ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU); } return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu); if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size) return false; if (t->pl.state == SCTP_PL_BASE) { if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) { t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); return true; } } else if (t->pl.state == SCTP_PL_SEARCH) { if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) { t->pl.state = SCTP_PL_BASE; /* Search -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.probe_count = 0; t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); return true; } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) { t->pl.probe_size = pmtu; t->pl.probe_count = 0; } } else if (t->pl.state == SCTP_PL_COMPLETE) { if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) { t->pl.state = SCTP_PL_BASE; /* Complete -> Base */ t->pl.probe_size = SCTP_BASE_PLPMTU; t->pl.probe_count = 0; t->pl.probe_high = 0; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_transport_reset_probe_timer(t); return true; } } return false; } bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct sock *sk = t->asoc->base.sk; struct dst_entry *dst; bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment instead */ pmtu = SCTP_DEFAULT_MINSEGMENT; } pmtu = SCTP_TRUNC4(pmtu); if (sctp_transport_pl_enabled(t)) return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t)); dst = sctp_transport_dst_check(t); if (dst) { struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); union sctp_addr addr; pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); } if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); dst = t->dst; } if (dst) { /* Re-fetch, as under layers may have a higher minimum size */ pmtu = sctp_dst_mtu(dst); change = t->pathmtu != pmtu; } t->pathmtu = pmtu; return change; } /* Caches the dst entry and source address for a transport's destination * address. */ void sctp_transport_route(struct sctp_transport *transport, union sctp_addr *saddr, struct sctp_sock *opt) { struct sctp_association *asoc = transport->asoc; struct sctp_af *af = transport->af_specific; sctp_transport_dst_release(transport); af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt)); if (saddr) memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); else af->get_saddr(opt, transport, &transport->fl); sctp_transport_pmtu(transport, sctp_opt2sk(opt)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). */ if (transport->dst && asoc && (!asoc->peer.primary_path || transport == asoc->peer.active_path)) opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk); } /* Hold a reference to a transport. */ int sctp_transport_hold(struct sctp_transport *transport) { return refcount_inc_not_zero(&transport->refcnt); } /* Release a reference to a transport and clean up * if there are no more references. */ void sctp_transport_put(struct sctp_transport *transport) { if (refcount_dec_and_test(&transport->refcnt)) sctp_transport_destroy(transport); } /* Update transport's RTO based on the newly calculated RTT. */ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) { if (unlikely(!tp->rto_pending)) /* We should not be doing any RTO updates unless rto_pending is set. */ pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp); if (tp->rttvar || tp->srtt) { struct net *net = tp->asoc->base.net; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' */ /* Note: The above algorithm has been rewritten to * express rto_beta and rto_alpha as inverse powers * of two. * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + (rtt >> net->sctp.rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. */ tp->srtt = rtt; tp->rttvar = rtt >> 1; } /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY. */ if (tp->rttvar == 0) tp->rttvar = SCTP_CLOCK_GRANULARITY; /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */ tp->rto = tp->srtt + (tp->rttvar << 2); /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min * seconds then it is rounded up to RTO.Min seconds. */ if (tp->rto < tp->asoc->rto_min) tp->rto = tp->asoc->rto_min; /* 6.3.1 C7) A maximum value may be placed on RTO provided it is * at least RTO.max seconds. */ if (tp->rto > tp->asoc->rto_max) tp->rto = tp->asoc->rto_max; sctp_max_rto(tp->asoc, tp); tp->rtt = rtt; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. */ tp->rto_pending = 0; pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n", __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto); } /* This routine updates the transport's cwnd and partial_bytes_acked * parameters based on the bytes acked in the received SACK. */ void sctp_transport_raise_cwnd(struct sctp_transport *transport, __u32 sack_ctsn, __u32 bytes_acked) { struct sctp_association *asoc = transport->asoc; __u32 cwnd, ssthresh, flight_size, pba, pmtu; cwnd = transport->cwnd; flight_size = transport->flight_size; /* See if we need to exit Fast Recovery first */ if (asoc->fast_recovery && TSN_lte(asoc->fast_recovery_exit, sack_ctsn)) asoc->fast_recovery = 0; ssthresh = transport->ssthresh; pba = transport->partial_bytes_acked; pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { /* RFC 4960 7.2.1 * o When cwnd is less than or equal to ssthresh, an SCTP * endpoint MUST use the slow-start algorithm to increase * cwnd only if the current congestion window is being fully * utilized, an incoming SACK advances the Cumulative TSN * Ack Point, and the data sender is not in Fast Recovery. * Only when these three conditions are met can the cwnd be * increased; otherwise, the cwnd MUST not be increased. * If these conditions are met, then cwnd MUST be increased * by, at most, the lesser of 1) the total size of the * previously outstanding DATA chunk(s) acknowledged, and * 2) the destination's path MTU. This upper bound protects * against the ACK-Splitting attack outlined in [SAVAGE99]. */ if (asoc->fast_recovery) return; /* The appropriate cwnd increase algorithm is performed * if, and only if the congestion window is being fully * utilized. Note that RFC4960 Errata 3.22 removed the * other condition on ctsn moving. */ if (flight_size < cwnd) return; if (bytes_acked > pmtu) cwnd += pmtu; else cwnd += bytes_acked; pr_debug("%s: slow start: transport:%p, bytes_acked:%d, " "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n", __func__, transport, bytes_acked, cwnd, ssthresh, flight_size, pba); } else { /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, * upon each SACK arrival, increase partial_bytes_acked * by the total number of bytes of all new chunks * acknowledged in that SACK including chunks * acknowledged by the new Cumulative TSN Ack and by Gap * Ack Blocks. (updated by RFC4960 Errata 3.22) * * When partial_bytes_acked is greater than cwnd and * before the arrival of the SACK the sender had less * bytes of data outstanding than cwnd (i.e., before * arrival of the SACK, flightsize was less than cwnd), * reset partial_bytes_acked to cwnd. (RFC 4960 Errata * 3.26) * * When partial_bytes_acked is equal to or greater than * cwnd and before the arrival of the SACK the sender * had cwnd or more bytes of data outstanding (i.e., * before arrival of the SACK, flightsize was greater * than or equal to cwnd), partial_bytes_acked is reset * to (partial_bytes_acked - cwnd). Next, cwnd is * increased by MTU. (RFC 4960 Errata 3.12) */ pba += bytes_acked; if (pba > cwnd && flight_size < cwnd) pba = cwnd; if (pba >= cwnd && flight_size >= cwnd) { pba = pba - cwnd; cwnd += pmtu; } pr_debug("%s: congestion avoidance: transport:%p, " "bytes_acked:%d, cwnd:%d, ssthresh:%d, " "flight_size:%d, pba:%d\n", __func__, transport, bytes_acked, cwnd, ssthresh, flight_size, pba); } transport->cwnd = cwnd; transport->partial_bytes_acked = pba; } /* This routine is used to lower the transport's cwnd when congestion is * detected. */ void sctp_transport_lower_cwnd(struct sctp_transport *transport, enum sctp_lower_cwnd reason) { struct sctp_association *asoc = transport->asoc; switch (reason) { case SCTP_LOWER_CWND_T3_RTX: /* RFC 2960 Section 7.2.3, sctpimpguide * When the T3-rtx timer expires on an address, SCTP should * perform slow start by: * ssthresh = max(cwnd/2, 4*MTU) * cwnd = 1*MTU * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, 4*asoc->pathmtu); transport->cwnd = asoc->pathmtu; /* T3-rtx also clears fast recovery */ asoc->fast_recovery = 0; break; case SCTP_LOWER_CWND_FAST_RTX: /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the * destination address(es) to which the missing DATA chunks * were last sent, according to the formula described in * Section 7.2.3. * * RFC 2960 7.2.3, sctpimpguide Upon detection of packet * losses from SACK (see Section 7.2.4), An endpoint * should do the following: * ssthresh = max(cwnd/2, 4*MTU) * cwnd = ssthresh * partial_bytes_acked = 0 */ if (asoc->fast_recovery) return; /* Mark Fast recovery */ asoc->fast_recovery = 1; asoc->fast_recovery_exit = asoc->next_tsn - 1; transport->ssthresh = max(transport->cwnd/2, 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; break; case SCTP_LOWER_CWND_ECNE: /* RFC 2481 Section 6.1.2. * If the sender receives an ECN-Echo ACK packet * then the sender knows that congestion was encountered in the * network on the path from the sender to the receiver. The * indication of congestion should be treated just as a * congestion loss in non-ECN Capable TCP. That is, the TCP * source halves the congestion window "cwnd" and reduces the * slow start threshold "ssthresh". * A critical condition is that TCP does not react to * congestion indications more than once every window of * data (or more loosely more than once every round-trip time). */ if (time_after(jiffies, transport->last_time_ecne_reduced + transport->rtt)) { transport->ssthresh = max(transport->cwnd/2, 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } break; case SCTP_LOWER_CWND_INACTIVE: /* RFC 2960 Section 7.2.1, sctpimpguide * When the endpoint does not transmit data on a given * transport address, the cwnd of the transport address * should be adjusted to max(cwnd/2, 4*MTU) per RTO. * NOTE: Although the draft recommends that this check needs * to be done every RTO interval, we do it every hearbeat * interval. */ transport->cwnd = max(transport->cwnd/2, 4*asoc->pathmtu); /* RFC 4960 Errata 3.27.2: also adjust sshthresh */ transport->ssthresh = transport->cwnd; break; } transport->partial_bytes_acked = 0; pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n", __func__, transport, reason, transport->cwnd, transport->ssthresh); } /* Apply Max.Burst limit to the congestion window: * sctpimpguide-05 2.14.2 * D) When the time comes for the sender to * transmit new DATA chunks, the protocol parameter Max.Burst MUST * first be applied to limit how many new DATA chunks may be sent. * The limit is applied by adjusting cwnd as follows: * if ((flightsize+ Max.Burst * MTU) < cwnd) * cwnd = flightsize + Max.Burst * MTU */ void sctp_transport_burst_limited(struct sctp_transport *t) { struct sctp_association *asoc = t->asoc; u32 old_cwnd = t->cwnd; u32 max_burst_bytes; if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); if (max_burst_bytes < old_cwnd) { t->cwnd = max_burst_bytes; t->burst_limited = old_cwnd; } } /* Restore the old cwnd congestion window, after the burst had it's * desired effect. */ void sctp_transport_burst_reset(struct sctp_transport *t) { if (t->burst_limited) { t->cwnd = t->burst_limited; t->burst_limited = 0; } } /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *trans) { /* RTO + timer slack +/- 50% of RTO */ unsigned long timeout = trans->rto >> 1; if (trans->state != SCTP_UNCONFIRMED && trans->state != SCTP_PF) timeout += trans->hbinterval; return max_t(unsigned long, timeout, HZ / 5); } /* Reset transport variables to their initial values */ void sctp_transport_reset(struct sctp_transport *t) { struct sctp_association *asoc = t->asoc; /* RFC 2960 (bis), Section 5.2.4 * All the congestion control parameters (e.g., cwnd, ssthresh) * related to this peer MUST be reset to their initial values * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; t->rto = asoc->rto_initial; sctp_max_rto(asoc, t); t->rtt = 0; t->srtt = 0; t->rttvar = 0; /* Reset these additional variables so that we have a clean slate. */ t->partial_bytes_acked = 0; t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; t->hb_sent = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; t->cacc.cycling_changeover = 0; t->cacc.next_tsn_at_change = 0; t->cacc.cacc_saw_newack = 0; } /* Schedule retransmission on the given transport */ void sctp_transport_immediate_rtx(struct sctp_transport *t) { /* Stop pending T3_rtx_timer */ if (del_timer(&t->T3_rtx_timer)) sctp_transport_put(t); sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); if (!timer_pending(&t->T3_rtx_timer)) { if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) sctp_transport_hold(t); } } /* Drop dst */ void sctp_transport_dst_release(struct sctp_transport *t) { dst_release(t->dst); t->dst = NULL; t->dst_pending_confirm = 0; } /* Schedule neighbour confirm */ void sctp_transport_dst_confirm(struct sctp_transport *t) { t->dst_pending_confirm = 1; } |
6 6 6 6 6 6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Realtek RTL2830 DVB-T demodulator driver * * Copyright (C) 2011 Antti Palosaari <crope@iki.fi> */ #include "rtl2830_priv.h" /* Our regmap is bypassing I2C adapter lock, thus we do it! */ static int rtl2830_bulk_write(struct i2c_client *client, unsigned int reg, const void *val, size_t val_count) { struct rtl2830_dev *dev = i2c_get_clientdata(client); int ret; i2c_lock_bus(client->adapter, I2C_LOCK_SEGMENT); ret = regmap_bulk_write(dev->regmap, reg, val, val_count); i2c_unlock_bus(client->adapter, I2C_LOCK_SEGMENT); return ret; } static int rtl2830_update_bits(struct i2c_client *client, unsigned int reg, unsigned int mask, unsigned int val) { struct rtl2830_dev *dev = i2c_get_clientdata(client); int ret; i2c_lock_bus(client->adapter, I2C_LOCK_SEGMENT); ret = regmap_update_bits(dev->regmap, reg, mask, val); i2c_unlock_bus(client->adapter, I2C_LOCK_SEGMENT); return ret; } static int rtl2830_bulk_read(struct i2c_client *client, unsigned int reg, void *val, size_t val_count) { struct rtl2830_dev *dev = i2c_get_clientdata(client); int ret; i2c_lock_bus(client->adapter, I2C_LOCK_SEGMENT); ret = regmap_bulk_read(dev->regmap, reg, val, val_count); i2c_unlock_bus(client->adapter, I2C_LOCK_SEGMENT); return ret; } static int rtl2830_init(struct dvb_frontend *fe) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); struct dtv_frontend_properties *c = &dev->fe.dtv_property_cache; int ret, i; struct rtl2830_reg_val_mask tab[] = { {0x00d, 0x01, 0x03}, {0x00d, 0x10, 0x10}, {0x104, 0x00, 0x1e}, {0x105, 0x80, 0x80}, {0x110, 0x02, 0x03}, {0x110, 0x08, 0x0c}, {0x17b, 0x00, 0x40}, {0x17d, 0x05, 0x0f}, {0x17d, 0x50, 0xf0}, {0x18c, 0x08, 0x0f}, {0x18d, 0x00, 0xc0}, {0x188, 0x05, 0x0f}, {0x189, 0x00, 0xfc}, {0x2d5, 0x02, 0x02}, {0x2f1, 0x02, 0x06}, {0x2f1, 0x20, 0xf8}, {0x16d, 0x00, 0x01}, {0x1a6, 0x00, 0x80}, {0x106, dev->pdata->vtop, 0x3f}, {0x107, dev->pdata->krf, 0x3f}, {0x112, 0x28, 0xff}, {0x103, dev->pdata->agc_targ_val, 0xff}, {0x00a, 0x02, 0x07}, {0x140, 0x0c, 0x3c}, {0x140, 0x40, 0xc0}, {0x15b, 0x05, 0x07}, {0x15b, 0x28, 0x38}, {0x15c, 0x05, 0x07}, {0x15c, 0x28, 0x38}, {0x115, dev->pdata->spec_inv, 0x01}, {0x16f, 0x01, 0x07}, {0x170, 0x18, 0x38}, {0x172, 0x0f, 0x0f}, {0x173, 0x08, 0x38}, {0x175, 0x01, 0x07}, {0x176, 0x00, 0xc0}, }; for (i = 0; i < ARRAY_SIZE(tab); i++) { ret = rtl2830_update_bits(client, tab[i].reg, tab[i].mask, tab[i].val); if (ret) goto err; } ret = rtl2830_bulk_write(client, 0x18f, "\x28\x00", 2); if (ret) goto err; ret = rtl2830_bulk_write(client, 0x195, "\x04\x06\x0a\x12\x0a\x12\x1e\x28", 8); if (ret) goto err; /* TODO: spec init */ /* soft reset */ ret = rtl2830_update_bits(client, 0x101, 0x04, 0x04); if (ret) goto err; ret = rtl2830_update_bits(client, 0x101, 0x04, 0x00); if (ret) goto err; /* init stats here in order signal app which stats are supported */ c->strength.len = 1; c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->cnr.len = 1; c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->post_bit_error.len = 1; c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->post_bit_count.len = 1; c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE; dev->sleeping = false; return ret; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static int rtl2830_sleep(struct dvb_frontend *fe) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); dev->sleeping = true; dev->fe_status = 0; return 0; } static int rtl2830_get_tune_settings(struct dvb_frontend *fe, struct dvb_frontend_tune_settings *s) { s->min_delay_ms = 500; s->step_size = fe->ops.info.frequency_stepsize_hz * 2; s->max_drift = (fe->ops.info.frequency_stepsize_hz * 2) + 1; return 0; } static int rtl2830_set_frontend(struct dvb_frontend *fe) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); struct dtv_frontend_properties *c = &fe->dtv_property_cache; int ret, i; u64 num; u8 buf[3], u8tmp; u32 if_ctl, if_frequency; static const u8 bw_params1[3][34] = { { 0x1f, 0xf0, 0x1f, 0xf0, 0x1f, 0xfa, 0x00, 0x17, 0x00, 0x41, 0x00, 0x64, 0x00, 0x67, 0x00, 0x38, 0x1f, 0xde, 0x1f, 0x7a, 0x1f, 0x47, 0x1f, 0x7c, 0x00, 0x30, 0x01, 0x4b, 0x02, 0x82, 0x03, 0x73, 0x03, 0xcf, /* 6 MHz */ }, { 0x1f, 0xfa, 0x1f, 0xda, 0x1f, 0xc1, 0x1f, 0xb3, 0x1f, 0xca, 0x00, 0x07, 0x00, 0x4d, 0x00, 0x6d, 0x00, 0x40, 0x1f, 0xca, 0x1f, 0x4d, 0x1f, 0x2a, 0x1f, 0xb2, 0x00, 0xec, 0x02, 0x7e, 0x03, 0xd0, 0x04, 0x53, /* 7 MHz */ }, { 0x00, 0x10, 0x00, 0x0e, 0x1f, 0xf7, 0x1f, 0xc9, 0x1f, 0xa0, 0x1f, 0xa6, 0x1f, 0xec, 0x00, 0x4e, 0x00, 0x7d, 0x00, 0x3a, 0x1f, 0x98, 0x1f, 0x10, 0x1f, 0x40, 0x00, 0x75, 0x02, 0x5f, 0x04, 0x24, 0x04, 0xdb, /* 8 MHz */ }, }; static const u8 bw_params2[3][6] = { {0xc3, 0x0c, 0x44, 0x33, 0x33, 0x30}, /* 6 MHz */ {0xb8, 0xe3, 0x93, 0x99, 0x99, 0x98}, /* 7 MHz */ {0xae, 0xba, 0xf3, 0x26, 0x66, 0x64}, /* 8 MHz */ }; dev_dbg(&client->dev, "frequency=%u bandwidth_hz=%u inversion=%u\n", c->frequency, c->bandwidth_hz, c->inversion); /* program tuner */ if (fe->ops.tuner_ops.set_params) fe->ops.tuner_ops.set_params(fe); switch (c->bandwidth_hz) { case 6000000: i = 0; break; case 7000000: i = 1; break; case 8000000: i = 2; break; default: dev_err(&client->dev, "invalid bandwidth_hz %u\n", c->bandwidth_hz); return -EINVAL; } ret = rtl2830_update_bits(client, 0x008, 0x06, i << 1); if (ret) goto err; /* program if frequency */ if (fe->ops.tuner_ops.get_if_frequency) ret = fe->ops.tuner_ops.get_if_frequency(fe, &if_frequency); else ret = -EINVAL; if (ret) goto err; num = if_frequency % dev->pdata->clk; num *= 0x400000; num = div_u64(num, dev->pdata->clk); num = -num; if_ctl = num & 0x3fffff; dev_dbg(&client->dev, "if_frequency=%d if_ctl=%08x\n", if_frequency, if_ctl); buf[0] = (if_ctl >> 16) & 0x3f; buf[1] = (if_ctl >> 8) & 0xff; buf[2] = (if_ctl >> 0) & 0xff; ret = rtl2830_bulk_read(client, 0x119, &u8tmp, 1); if (ret) goto err; buf[0] |= u8tmp & 0xc0; /* [7:6] */ ret = rtl2830_bulk_write(client, 0x119, buf, 3); if (ret) goto err; /* 1/2 split I2C write */ ret = rtl2830_bulk_write(client, 0x11c, &bw_params1[i][0], 17); if (ret) goto err; /* 2/2 split I2C write */ ret = rtl2830_bulk_write(client, 0x12d, &bw_params1[i][17], 17); if (ret) goto err; ret = rtl2830_bulk_write(client, 0x19d, bw_params2[i], 6); if (ret) goto err; return ret; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static int rtl2830_get_frontend(struct dvb_frontend *fe, struct dtv_frontend_properties *c) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); int ret; u8 buf[3]; if (dev->sleeping) return 0; ret = rtl2830_bulk_read(client, 0x33c, buf, 2); if (ret) goto err; ret = rtl2830_bulk_read(client, 0x351, &buf[2], 1); if (ret) goto err; dev_dbg(&client->dev, "TPS=%*ph\n", 3, buf); switch ((buf[0] >> 2) & 3) { case 0: c->modulation = QPSK; break; case 1: c->modulation = QAM_16; break; case 2: c->modulation = QAM_64; break; } switch ((buf[2] >> 2) & 1) { case 0: c->transmission_mode = TRANSMISSION_MODE_2K; break; case 1: c->transmission_mode = TRANSMISSION_MODE_8K; } switch ((buf[2] >> 0) & 3) { case 0: c->guard_interval = GUARD_INTERVAL_1_32; break; case 1: c->guard_interval = GUARD_INTERVAL_1_16; break; case 2: c->guard_interval = GUARD_INTERVAL_1_8; break; case 3: c->guard_interval = GUARD_INTERVAL_1_4; break; } switch ((buf[0] >> 4) & 7) { case 0: c->hierarchy = HIERARCHY_NONE; break; case 1: c->hierarchy = HIERARCHY_1; break; case 2: c->hierarchy = HIERARCHY_2; break; case 3: c->hierarchy = HIERARCHY_4; break; } switch ((buf[1] >> 3) & 7) { case 0: c->code_rate_HP = FEC_1_2; break; case 1: c->code_rate_HP = FEC_2_3; break; case 2: c->code_rate_HP = FEC_3_4; break; case 3: c->code_rate_HP = FEC_5_6; break; case 4: c->code_rate_HP = FEC_7_8; break; } switch ((buf[1] >> 0) & 7) { case 0: c->code_rate_LP = FEC_1_2; break; case 1: c->code_rate_LP = FEC_2_3; break; case 2: c->code_rate_LP = FEC_3_4; break; case 3: c->code_rate_LP = FEC_5_6; break; case 4: c->code_rate_LP = FEC_7_8; break; } return 0; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static int rtl2830_read_status(struct dvb_frontend *fe, enum fe_status *status) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); struct dtv_frontend_properties *c = &dev->fe.dtv_property_cache; int ret, stmp; unsigned int utmp; u8 u8tmp, buf[2]; *status = 0; if (dev->sleeping) return 0; ret = rtl2830_bulk_read(client, 0x351, &u8tmp, 1); if (ret) goto err; u8tmp = (u8tmp >> 3) & 0x0f; /* [6:3] */ if (u8tmp == 11) { *status |= FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK; } else if (u8tmp == 10) { *status |= FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI; } dev->fe_status = *status; /* Signal strength */ if (dev->fe_status & FE_HAS_SIGNAL) { /* Read IF AGC */ ret = rtl2830_bulk_read(client, 0x359, buf, 2); if (ret) goto err; stmp = buf[0] << 8 | buf[1] << 0; stmp = sign_extend32(stmp, 13); utmp = clamp_val(-4 * stmp + 32767, 0x0000, 0xffff); dev_dbg(&client->dev, "IF AGC=%d\n", stmp); c->strength.stat[0].scale = FE_SCALE_RELATIVE; c->strength.stat[0].uvalue = utmp; } else { c->strength.stat[0].scale = FE_SCALE_NOT_AVAILABLE; } /* CNR */ if (dev->fe_status & FE_HAS_VITERBI) { unsigned int hierarchy, constellation; #define CONSTELLATION_NUM 3 #define HIERARCHY_NUM 4 static const u32 constant[CONSTELLATION_NUM][HIERARCHY_NUM] = { {70705899, 70705899, 70705899, 70705899}, {82433173, 82433173, 87483115, 94445660}, {92888734, 92888734, 95487525, 99770748}, }; ret = rtl2830_bulk_read(client, 0x33c, &u8tmp, 1); if (ret) goto err; constellation = (u8tmp >> 2) & 0x03; /* [3:2] */ if (constellation > CONSTELLATION_NUM - 1) goto err; hierarchy = (u8tmp >> 4) & 0x07; /* [6:4] */ if (hierarchy > HIERARCHY_NUM - 1) goto err; ret = rtl2830_bulk_read(client, 0x40c, buf, 2); if (ret) goto err; utmp = buf[0] << 8 | buf[1] << 0; if (utmp) stmp = (constant[constellation][hierarchy] - intlog10(utmp)) / ((1 << 24) / 10000); else stmp = 0; dev_dbg(&client->dev, "CNR raw=%u\n", utmp); c->cnr.stat[0].scale = FE_SCALE_DECIBEL; c->cnr.stat[0].svalue = stmp; } else { c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE; } /* BER */ if (dev->fe_status & FE_HAS_LOCK) { ret = rtl2830_bulk_read(client, 0x34e, buf, 2); if (ret) goto err; utmp = buf[0] << 8 | buf[1] << 0; dev->post_bit_error += utmp; dev->post_bit_count += 1000000; dev_dbg(&client->dev, "BER errors=%u total=1000000\n", utmp); c->post_bit_error.stat[0].scale = FE_SCALE_COUNTER; c->post_bit_error.stat[0].uvalue = dev->post_bit_error; c->post_bit_count.stat[0].scale = FE_SCALE_COUNTER; c->post_bit_count.stat[0].uvalue = dev->post_bit_count; } else { c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE; } return ret; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static int rtl2830_read_snr(struct dvb_frontend *fe, u16 *snr) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; if (c->cnr.stat[0].scale == FE_SCALE_DECIBEL) *snr = div_s64(c->cnr.stat[0].svalue, 100); else *snr = 0; return 0; } static int rtl2830_read_ber(struct dvb_frontend *fe, u32 *ber) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); *ber = (dev->post_bit_error - dev->post_bit_error_prev); dev->post_bit_error_prev = dev->post_bit_error; return 0; } static int rtl2830_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks) { *ucblocks = 0; return 0; } static int rtl2830_read_signal_strength(struct dvb_frontend *fe, u16 *strength) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; if (c->strength.stat[0].scale == FE_SCALE_RELATIVE) *strength = c->strength.stat[0].uvalue; else *strength = 0; return 0; } static const struct dvb_frontend_ops rtl2830_ops = { .delsys = {SYS_DVBT}, .info = { .name = "Realtek RTL2830 (DVB-T)", .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO | FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_HIERARCHY_AUTO | FE_CAN_RECOVER | FE_CAN_MUTE_TS }, .init = rtl2830_init, .sleep = rtl2830_sleep, .get_tune_settings = rtl2830_get_tune_settings, .set_frontend = rtl2830_set_frontend, .get_frontend = rtl2830_get_frontend, .read_status = rtl2830_read_status, .read_snr = rtl2830_read_snr, .read_ber = rtl2830_read_ber, .read_ucblocks = rtl2830_read_ucblocks, .read_signal_strength = rtl2830_read_signal_strength, }; static int rtl2830_pid_filter_ctrl(struct dvb_frontend *fe, int onoff) { struct i2c_client *client = fe->demodulator_priv; int ret; u8 u8tmp; dev_dbg(&client->dev, "onoff=%d\n", onoff); /* enable / disable PID filter */ if (onoff) u8tmp = 0x80; else u8tmp = 0x00; ret = rtl2830_update_bits(client, 0x061, 0x80, u8tmp); if (ret) goto err; return 0; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static int rtl2830_pid_filter(struct dvb_frontend *fe, u8 index, u16 pid, int onoff) { struct i2c_client *client = fe->demodulator_priv; struct rtl2830_dev *dev = i2c_get_clientdata(client); int ret; u8 buf[4]; dev_dbg(&client->dev, "index=%d pid=%04x onoff=%d\n", index, pid, onoff); /* skip invalid PIDs (0x2000) */ if (pid > 0x1fff || index >= 32) return 0; if (onoff) set_bit(index, &dev->filters); else clear_bit(index, &dev->filters); /* enable / disable PIDs */ buf[0] = (dev->filters >> 0) & 0xff; buf[1] = (dev->filters >> 8) & 0xff; buf[2] = (dev->filters >> 16) & 0xff; buf[3] = (dev->filters >> 24) & 0xff; ret = rtl2830_bulk_write(client, 0x062, buf, 4); if (ret) goto err; /* add PID */ buf[0] = (pid >> 8) & 0xff; buf[1] = (pid >> 0) & 0xff; ret = rtl2830_bulk_write(client, 0x066 + 2 * index, buf, 2); if (ret) goto err; return 0; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } /* * I2C gate/mux/repeater logic * We must use unlocked __i2c_transfer() here (through regmap) because of I2C * adapter lock is already taken by tuner driver. * Gate is closed automatically after single I2C transfer. */ static int rtl2830_select(struct i2c_mux_core *muxc, u32 chan_id) { struct i2c_client *client = i2c_mux_priv(muxc); struct rtl2830_dev *dev = i2c_get_clientdata(client); int ret; dev_dbg(&client->dev, "\n"); /* open I2C repeater for 1 transfer, closes automatically */ /* XXX: regmap_update_bits() does not lock I2C adapter */ ret = regmap_update_bits(dev->regmap, 0x101, 0x08, 0x08); if (ret) goto err; return 0; err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static struct dvb_frontend *rtl2830_get_dvb_frontend(struct i2c_client *client) { struct rtl2830_dev *dev = i2c_get_clientdata(client); dev_dbg(&client->dev, "\n"); return &dev->fe; } static struct i2c_adapter *rtl2830_get_i2c_adapter(struct i2c_client *client) { struct rtl2830_dev *dev = i2c_get_clientdata(client); dev_dbg(&client->dev, "\n"); return dev->muxc->adapter[0]; } /* * We implement own I2C access routines for regmap in order to get manual access * to I2C adapter lock, which is needed for I2C mux adapter. */ static int rtl2830_regmap_read(void *context, const void *reg_buf, size_t reg_size, void *val_buf, size_t val_size) { struct i2c_client *client = context; int ret; struct i2c_msg msg[2] = { { .addr = client->addr, .flags = 0, .len = reg_size, .buf = (u8 *)reg_buf, }, { .addr = client->addr, .flags = I2C_M_RD, .len = val_size, .buf = val_buf, } }; ret = __i2c_transfer(client->adapter, msg, 2); if (ret != 2) { dev_warn(&client->dev, "i2c reg read failed %d\n", ret); if (ret >= 0) ret = -EREMOTEIO; return ret; } return 0; } static int rtl2830_regmap_write(void *context, const void *data, size_t count) { struct i2c_client *client = context; int ret; struct i2c_msg msg[1] = { { .addr = client->addr, .flags = 0, .len = count, .buf = (u8 *)data, } }; ret = __i2c_transfer(client->adapter, msg, 1); if (ret != 1) { dev_warn(&client->dev, "i2c reg write failed %d\n", ret); if (ret >= 0) ret = -EREMOTEIO; return ret; } return 0; } static int rtl2830_regmap_gather_write(void *context, const void *reg, size_t reg_len, const void *val, size_t val_len) { struct i2c_client *client = context; int ret; u8 buf[256]; struct i2c_msg msg[1] = { { .addr = client->addr, .flags = 0, .len = 1 + val_len, .buf = buf, } }; buf[0] = *(u8 const *)reg; memcpy(&buf[1], val, val_len); ret = __i2c_transfer(client->adapter, msg, 1); if (ret != 1) { dev_warn(&client->dev, "i2c reg write failed %d\n", ret); if (ret >= 0) ret = -EREMOTEIO; return ret; } return 0; } static int rtl2830_probe(struct i2c_client *client) { struct rtl2830_platform_data *pdata = client->dev.platform_data; struct rtl2830_dev *dev; int ret; u8 u8tmp; static const struct regmap_bus regmap_bus = { .read = rtl2830_regmap_read, .write = rtl2830_regmap_write, .gather_write = rtl2830_regmap_gather_write, .val_format_endian_default = REGMAP_ENDIAN_NATIVE, }; static const struct regmap_range_cfg regmap_range_cfg[] = { { .selector_reg = 0x00, .selector_mask = 0xff, .selector_shift = 0, .window_start = 0, .window_len = 0x100, .range_min = 0 * 0x100, .range_max = 5 * 0x100, }, }; static const struct regmap_config regmap_config = { .reg_bits = 8, .val_bits = 8, .max_register = 5 * 0x100, .ranges = regmap_range_cfg, .num_ranges = ARRAY_SIZE(regmap_range_cfg), }; dev_dbg(&client->dev, "\n"); if (pdata == NULL) { ret = -EINVAL; goto err; } /* allocate memory for the internal state */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) { ret = -ENOMEM; goto err; } /* setup the state */ i2c_set_clientdata(client, dev); dev->client = client; dev->pdata = client->dev.platform_data; dev->sleeping = true; dev->regmap = regmap_init(&client->dev, ®map_bus, client, ®map_config); if (IS_ERR(dev->regmap)) { ret = PTR_ERR(dev->regmap); goto err_kfree; } /* check if the demod is there */ ret = rtl2830_bulk_read(client, 0x000, &u8tmp, 1); if (ret) goto err_regmap_exit; /* create muxed i2c adapter for tuner */ dev->muxc = i2c_mux_alloc(client->adapter, &client->dev, 1, 0, 0, rtl2830_select, NULL); if (!dev->muxc) { ret = -ENOMEM; goto err_regmap_exit; } dev->muxc->priv = client; ret = i2c_mux_add_adapter(dev->muxc, 0, 0); if (ret) goto err_regmap_exit; /* create dvb frontend */ memcpy(&dev->fe.ops, &rtl2830_ops, sizeof(dev->fe.ops)); dev->fe.demodulator_priv = client; /* setup callbacks */ pdata->get_dvb_frontend = rtl2830_get_dvb_frontend; pdata->get_i2c_adapter = rtl2830_get_i2c_adapter; pdata->pid_filter = rtl2830_pid_filter; pdata->pid_filter_ctrl = rtl2830_pid_filter_ctrl; dev_info(&client->dev, "Realtek RTL2830 successfully attached\n"); return 0; err_regmap_exit: regmap_exit(dev->regmap); err_kfree: kfree(dev); err: dev_dbg(&client->dev, "failed=%d\n", ret); return ret; } static void rtl2830_remove(struct i2c_client *client) { struct rtl2830_dev *dev = i2c_get_clientdata(client); dev_dbg(&client->dev, "\n"); i2c_mux_del_adapters(dev->muxc); regmap_exit(dev->regmap); kfree(dev); } static const struct i2c_device_id rtl2830_id_table[] = { { "rtl2830" }, {} }; MODULE_DEVICE_TABLE(i2c, rtl2830_id_table); static struct i2c_driver rtl2830_driver = { .driver = { .name = "rtl2830", .suppress_bind_attrs = true, }, .probe = rtl2830_probe, .remove = rtl2830_remove, .id_table = rtl2830_id_table, }; module_i2c_driver(rtl2830_driver); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_DESCRIPTION("Realtek RTL2830 DVB-T demodulator driver"); MODULE_LICENSE("GPL"); |
3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_TRACEPOINT_H #define _LINUX_TRACEPOINT_H /* * Kernel Tracepoint API. * * See Documentation/trace/tracepoints.rst. * * Copyright (C) 2008-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> * * Heavily inspired from the Linux Kernel Markers. */ #include <linux/smp.h> #include <linux/srcu.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/rcupdate_trace.h> #include <linux/tracepoint-defs.h> #include <linux/static_call.h> struct module; struct tracepoint; struct notifier_block; struct trace_eval_map { const char *system; const char *eval_string; unsigned long eval_value; }; #define TRACEPOINT_DEFAULT_PRIO 10 extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, int prio); extern int tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data, int prio); extern int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); static inline int tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe, void *data) { return tracepoint_probe_register_prio_may_exist(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); } extern void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), void *priv); #ifdef CONFIG_MODULES struct tp_module { struct list_head list; struct module *mod; }; bool trace_module_has_bad_taint(struct module *mod); extern int register_tracepoint_module_notifier(struct notifier_block *nb); extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); void for_each_module_tracepoint(void (*fct)(struct tracepoint *, struct module *, void *), void *priv); void for_each_tracepoint_in_module(struct module *, void (*fct)(struct tracepoint *, struct module *, void *), void *priv); #else static inline bool trace_module_has_bad_taint(struct module *mod) { return false; } static inline int register_tracepoint_module_notifier(struct notifier_block *nb) { return 0; } static inline int unregister_tracepoint_module_notifier(struct notifier_block *nb) { return 0; } static inline void for_each_module_tracepoint(void (*fct)(struct tracepoint *, struct module *, void *), void *priv) { } static inline void for_each_tracepoint_in_module(struct module *mod, void (*fct)(struct tracepoint *, struct module *, void *), void *priv) { } #endif /* CONFIG_MODULES */ /* * tracepoint_synchronize_unregister must be called between the last tracepoint * probe unregistration and the end of module exit to make sure there is no * caller executing a probe when it is freed. * * An alternative is to use the following for batch reclaim associated * with a given tracepoint: * * - tracepoint_is_faultable() == false: call_rcu() * - tracepoint_is_faultable() == true: call_rcu_tasks_trace() */ #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { synchronize_rcu_tasks_trace(); synchronize_rcu(); } static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return tp->ext && tp->ext->faultable; } #else static inline void tracepoint_synchronize_unregister(void) { } static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return false; } #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS extern int syscall_regfunc(void); extern void syscall_unregfunc(void); #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ #ifndef PARAMS #define PARAMS(args...) args #endif #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { return offset_to_ptr(p); } #define __TRACEPOINT_ENTRY(name) \ asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ " .balign 4 \n" \ " .long __tracepoint_" #name " - . \n" \ " .previous \n") #else static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { return *p; } #define __TRACEPOINT_ENTRY(name) \ static tracepoint_ptr_t __tracepoint_ptr_##name __used \ __section("__tracepoints_ptrs") = &__tracepoint_##name #endif #endif /* _LINUX_TRACEPOINT_H */ /* * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include * file ifdef protection. * This is due to the way trace events work. If a file includes two * trace event headers under one "CREATE_TRACE_POINTS" the first include * will override the TRACE_EVENT and break the second include. */ #ifndef DECLARE_TRACE #define TP_PROTO(args...) args #define TP_ARGS(args...) args #define TP_CONDITION(args...) args /* * Individual subsystem my have a separate configuration to * enable their tracepoints. By default, this file will create * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created * it can define NOTRACE before including the tracepoint headers. */ #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE) #define TRACEPOINTS_ENABLED #endif #ifdef TRACEPOINTS_ENABLED #ifdef CONFIG_HAVE_STATIC_CALL #define __DO_TRACE_CALL(name, args) \ do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ static_call(tp_func_##name)(__data, args); \ } \ } while (0) #else #define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ /* * Declare an exported function that Rust code can call to trigger this * tracepoint. This function does not include the static branch; that is done * in Rust to avoid a function call when the tracepoint is disabled. */ #define DEFINE_RUST_DO_TRACE(name, proto, args) #define __DEFINE_RUST_DO_TRACE(name, proto, args) \ notrace void rust_do_trace_##name(proto) \ { \ __do_trace_##name(args); \ } /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. * * When lockdep is enabled, we make sure to always test if RCU is * "watching" regardless if the tracepoint is enabled or not. Tracepoints * require RCU to be active, and it should always warn at the tracepoint * site if it is not watching, as it will need to be active when the * tracepoint is enabled. */ #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ extern void rust_do_trace_##name(proto); \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_##name, \ (void *)probe, data); \ } \ static inline int \ register_trace_prio_##name(void (*probe)(data_proto), void *data,\ int prio) \ { \ return tracepoint_probe_register_prio(&__tracepoint_##name, \ (void *)probe, data, prio); \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_##name,\ (void *)probe, data); \ } \ static inline void \ check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool \ trace_##name##_enabled(void) \ { \ return static_branch_unlikely(&__tracepoint_##name.key);\ } #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ if (cond) { \ guard(preempt_notrace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ } \ static inline void trace_##name(proto) \ { \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ } #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ guard(rcu_tasks_trace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ static inline void trace_##name(proto) \ { \ might_fault(); \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ } /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. * * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ #define __DEFINE_TRACE_EXT(_name, _ext, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ int __traceiter_##_name(void *__data, proto); \ void __probestub_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used \ __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_FALSE_INIT, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ .iterator = &__traceiter_##_name, \ .probestub = &__probestub_##_name, \ .funcs = NULL, \ .ext = _ext, \ }; \ __TRACEPOINT_ENTRY(_name); \ int __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ if (it_func_ptr) { \ do { \ it_func = READ_ONCE((it_func_ptr)->func); \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ } while ((++it_func_ptr)->func); \ } \ return 0; \ } \ void __probestub_##_name(void *__data, proto) \ { \ } \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \ DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args)) #define DEFINE_TRACE_FN(_name, _reg, _unreg, _proto, _args) \ static struct tracepoint_ext __tracepoint_ext_##_name = { \ .regfunc = _reg, \ .unregfunc = _unreg, \ .faultable = false, \ }; \ __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); #define DEFINE_TRACE_SYSCALL(_name, _reg, _unreg, _proto, _args) \ static struct tracepoint_ext __tracepoint_ext_##_name = { \ .regfunc = _reg, \ .unregfunc = _unreg, \ .faultable = true, \ }; \ __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); #define DEFINE_TRACE(_name, _proto, _args) \ __DEFINE_TRACE_EXT(_name, NULL, PARAMS(_proto), PARAMS(_args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_STATIC_CALL(tp_func_##name) #else /* !TRACEPOINTS_ENABLED */ #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ static inline void trace_##name(proto) \ { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ { \ return -ENOSYS; \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), \ void *data) \ { \ return -ENOSYS; \ } \ static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool \ trace_##name##_enabled(void) \ { \ return false; \ } #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) #define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args) #define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) #endif /* TRACEPOINTS_ENABLED */ #ifdef CONFIG_TRACING /** * tracepoint_string - register constant persistent string to trace system * @str - a constant persistent string that will be referenced in tracepoints * * If constant strings are being used in tracepoints, it is faster and * more efficient to just save the pointer to the string and reference * that with a printf "%s" instead of saving the string in the ring buffer * and wasting space and time. * * The problem with the above approach is that userspace tools that read * the binary output of the trace buffers do not have access to the string. * Instead they just show the address of the string which is not very * useful to users. * * With tracepoint_string(), the string will be registered to the tracing * system and exported to userspace via the debugfs/tracing/printk_formats * file that maps the string address to the string text. This way userspace * tools that read the binary buffers have a way to map the pointers to * the ASCII strings they represent. * * The @str used must be a constant string and persistent as it would not * make sense to show a string that no longer exists. But it is still fine * to be used with modules, because when modules are unloaded, if they * had tracepoints, the ring buffers are cleared too. As long as the string * does not change during the life of the module, it is fine to use * tracepoint_string() within a module. */ #define tracepoint_string(str) \ ({ \ static const char *___tp_str __tracepoint_string = str; \ ___tp_str; \ }) #define __tracepoint_string __used __section("__tracepoint_str") #else /* * tracepoint_string() is used to save the string address for userspace * tracing tools. When tracing isn't configured, there's no need to save * anything. */ # define tracepoint_string(str) str # define __tracepoint_string #endif #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_SYSCALL(name, proto, args) \ __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ PARAMS(void *__data, proto)) #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * * We define a tracepoint, its arguments, its printk format * and its 'fast binary record' layout. * * Firstly, name your tracepoint via TRACE_EVENT(name : the * 'subsystem_event' notation is fine. * * Think about this whole construct as the * 'trace_sched_switch() function' from now on. * * * TRACE_EVENT(sched_switch, * * * * * A function has a regular function arguments * * prototype, declare it via TP_PROTO(): * * * * TP_PROTO(struct rq *rq, struct task_struct *prev, * struct task_struct *next), * * * * * Define the call signature of the 'function'. * * (Design sidenote: we use this instead of a * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) * * * * TP_ARGS(rq, prev, next), * * * * * Fast binary tracing: define the trace record via * * TP_STRUCT__entry(). You can think about it like a * * regular C structure local variable definition. * * * * This is how the trace record is structured and will * * be saved into the ring buffer. These are the fields * * that will be exposed to user-space in * * /sys/kernel/tracing/events/<*>/format. * * * * The declared 'local variable' is called '__entry' * * * * __field(pid_t, prev_pid) is equivalent to a standard declaration: * * * * pid_t prev_pid; * * * * __array(char, prev_comm, TASK_COMM_LEN) is equivalent to: * * * * char prev_comm[TASK_COMM_LEN]; * * * * TP_STRUCT__entry( * __array( char, prev_comm, TASK_COMM_LEN ) * __field( pid_t, prev_pid ) * __field( int, prev_prio ) * __array( char, next_comm, TASK_COMM_LEN ) * __field( pid_t, next_pid ) * __field( int, next_prio ) * ), * * * * * Assign the entry into the trace record, by embedding * * a full C statement block into TP_fast_assign(). You * * can refer to the trace record as '__entry' - * * otherwise you can put arbitrary C code in here. * * * * Note: this C code will execute every time a trace event * * happens, on an active tracepoint. * * * * TP_fast_assign( * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); * __entry->prev_pid = prev->pid; * __entry->prev_prio = prev->prio; * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); * __entry->next_pid = next->pid; * __entry->next_prio = next->prio; * ), * * * * * Formatted output of a trace record via TP_printk(). * * This is how the tracepoint will appear under ftrace * * plugins that make use of this tracepoint. * * * * (raw-binary tracing wont actually perform this step.) * * * * TP_printk("task %s:%d [%d] ==> %s:%d [%d]", * __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, * __entry->next_comm, __entry->next_pid, __entry->next_prio), * * ); * * This macro construct is thus used for the regular printk format * tracing setup, it is used to construct a function pointer based * tracepoint callback (this is used by programmatic plugins and * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in * /sys/kernel/tracing/events/. * * A set of (un)registration functions can be passed to the variant * TRACE_EVENT_FN to perform any (un)registration work. */ #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, \ args, cond) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, \ struct, assign, print) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ print, reg, unreg) \ DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) #define DECLARE_EVENT_NOP(name, proto, args) \ static inline void trace_##name(proto) \ { } \ static inline bool trace_##name##_enabled(void) \ { \ return false; \ } #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) \ DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT_NOP(template, name, proto, args) \ DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) #endif /* ifdef TRACE_EVENT (see note above) */ |
1 1 8 8 7 3 7 2 7 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 | // SPDX-License-Identifier: GPL-2.0 /* Parts of this driver are based on the following: * - Kvaser linux mhydra driver (version 5.24) * - CAN driver for esd CAN-USB/2 * * Copyright (C) 2018 KVASER AB, Sweden. All rights reserved. * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh * * Known issues: * - Transition from CAN_STATE_ERROR_WARNING to CAN_STATE_ERROR_ACTIVE is only * reported after a call to do_get_berr_counter(), since firmware does not * distinguish between ERROR_WARNING and ERROR_ACTIVE. */ #include <linux/completion.h> #include <linux/device.h> #include <linux/gfp.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/types.h> #include <linux/units.h> #include <linux/usb.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/can/netlink.h> #include "kvaser_usb.h" /* Forward declarations */ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt; #define KVASER_USB_HYDRA_BULK_EP_IN_ADDR 0x82 #define KVASER_USB_HYDRA_BULK_EP_OUT_ADDR 0x02 #define KVASER_USB_HYDRA_MAX_TRANSID 0xff #define KVASER_USB_HYDRA_MIN_TRANSID 0x01 /* Minihydra command IDs */ #define CMD_SET_BUSPARAMS_REQ 16 #define CMD_GET_BUSPARAMS_REQ 17 #define CMD_GET_BUSPARAMS_RESP 18 #define CMD_GET_CHIP_STATE_REQ 19 #define CMD_CHIP_STATE_EVENT 20 #define CMD_SET_DRIVERMODE_REQ 21 #define CMD_START_CHIP_REQ 26 #define CMD_START_CHIP_RESP 27 #define CMD_STOP_CHIP_REQ 28 #define CMD_STOP_CHIP_RESP 29 #define CMD_TX_CAN_MESSAGE 33 #define CMD_GET_CARD_INFO_REQ 34 #define CMD_GET_CARD_INFO_RESP 35 #define CMD_GET_SOFTWARE_INFO_REQ 38 #define CMD_GET_SOFTWARE_INFO_RESP 39 #define CMD_ERROR_EVENT 45 #define CMD_FLUSH_QUEUE 48 #define CMD_TX_ACKNOWLEDGE 50 #define CMD_FLUSH_QUEUE_RESP 66 #define CMD_SET_BUSPARAMS_FD_REQ 69 #define CMD_SET_BUSPARAMS_FD_RESP 70 #define CMD_SET_BUSPARAMS_RESP 85 #define CMD_GET_CAPABILITIES_REQ 95 #define CMD_GET_CAPABILITIES_RESP 96 #define CMD_RX_MESSAGE 106 #define CMD_MAP_CHANNEL_REQ 200 #define CMD_MAP_CHANNEL_RESP 201 #define CMD_GET_SOFTWARE_DETAILS_REQ 202 #define CMD_GET_SOFTWARE_DETAILS_RESP 203 #define CMD_EXTENDED 255 /* Minihydra extended command IDs */ #define CMD_TX_CAN_MESSAGE_FD 224 #define CMD_TX_ACKNOWLEDGE_FD 225 #define CMD_RX_MESSAGE_FD 226 /* Hydra commands are handled by different threads in firmware. * The threads are denoted hydra entity (HE). Each HE got a unique 6-bit * address. The address is used in hydra commands to get/set source and * destination HE. There are two predefined HE addresses, the remaining * addresses are different between devices and firmware versions. Hence, we need * to enumerate the addresses (see kvaser_usb_hydra_map_channel()). */ /* Well-known HE addresses */ #define KVASER_USB_HYDRA_HE_ADDRESS_ROUTER 0x00 #define KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL 0x3e #define KVASER_USB_HYDRA_TRANSID_CANHE 0x40 #define KVASER_USB_HYDRA_TRANSID_SYSDBG 0x61 struct kvaser_cmd_map_ch_req { char name[16]; u8 channel; u8 reserved[11]; } __packed; struct kvaser_cmd_map_ch_res { u8 he_addr; u8 channel; u8 reserved[26]; } __packed; struct kvaser_cmd_card_info { __le32 serial_number; __le32 clock_res; __le32 mfg_date; __le32 ean[2]; u8 hw_version; u8 usb_mode; u8 hw_type; u8 reserved0; u8 nchannels; u8 reserved1[3]; } __packed; struct kvaser_cmd_sw_info { u8 reserved0[8]; __le16 max_outstanding_tx; u8 reserved1[18]; } __packed; struct kvaser_cmd_sw_detail_req { u8 use_ext_cmd; u8 reserved[27]; } __packed; /* Software detail flags */ #define KVASER_USB_HYDRA_SW_FLAG_FW_BETA BIT(2) #define KVASER_USB_HYDRA_SW_FLAG_FW_BAD BIT(4) #define KVASER_USB_HYDRA_SW_FLAG_FREQ_80M BIT(5) #define KVASER_USB_HYDRA_SW_FLAG_EXT_CMD BIT(9) #define KVASER_USB_HYDRA_SW_FLAG_CANFD BIT(10) #define KVASER_USB_HYDRA_SW_FLAG_NONISO BIT(11) #define KVASER_USB_HYDRA_SW_FLAG_EXT_CAP BIT(12) #define KVASER_USB_HYDRA_SW_FLAG_CAN_FREQ_80M BIT(13) struct kvaser_cmd_sw_detail_res { __le32 sw_flags; __le32 sw_version; __le32 sw_name; __le32 ean[2]; __le32 max_bitrate; u8 reserved[4]; } __packed; /* Sub commands for cap_req and cap_res */ #define KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE 0x02 #define KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT 0x05 #define KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT 0x06 struct kvaser_cmd_cap_req { __le16 cap_cmd; u8 reserved[26]; } __packed; /* Status codes for cap_res */ #define KVASER_USB_HYDRA_CAP_STAT_OK 0x00 #define KVASER_USB_HYDRA_CAP_STAT_NOT_IMPL 0x01 #define KVASER_USB_HYDRA_CAP_STAT_UNAVAIL 0x02 struct kvaser_cmd_cap_res { __le16 cap_cmd; __le16 status; __le32 mask; __le32 value; u8 reserved[16]; } __packed; /* CMD_ERROR_EVENT error codes */ #define KVASER_USB_HYDRA_ERROR_EVENT_CAN 0x01 #define KVASER_USB_HYDRA_ERROR_EVENT_PARAM 0x09 struct kvaser_cmd_error_event { __le16 timestamp[3]; u8 reserved; u8 error_code; __le16 info1; __le16 info2; } __packed; /* Chip state status flags. Used for chip_state_event and err_frame_data. */ #define KVASER_USB_HYDRA_BUS_ERR_ACT 0x00 #define KVASER_USB_HYDRA_BUS_ERR_PASS BIT(5) #define KVASER_USB_HYDRA_BUS_BUS_OFF BIT(6) struct kvaser_cmd_chip_state_event { __le16 timestamp[3]; u8 tx_err_counter; u8 rx_err_counter; u8 bus_status; u8 reserved[19]; } __packed; /* Busparam modes */ #define KVASER_USB_HYDRA_BUS_MODE_CAN 0x00 #define KVASER_USB_HYDRA_BUS_MODE_CANFD_ISO 0x01 #define KVASER_USB_HYDRA_BUS_MODE_NONISO 0x02 struct kvaser_cmd_set_busparams { struct kvaser_usb_busparams busparams_nominal; u8 reserved0[4]; struct kvaser_usb_busparams busparams_data; u8 canfd_mode; u8 reserved1[7]; } __packed; /* Busparam type */ #define KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN 0x00 #define KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD 0x01 struct kvaser_cmd_get_busparams_req { u8 type; u8 reserved[27]; } __packed; struct kvaser_cmd_get_busparams_res { struct kvaser_usb_busparams busparams; u8 reserved[20]; } __packed; /* Ctrl modes */ #define KVASER_USB_HYDRA_CTRLMODE_NORMAL 0x01 #define KVASER_USB_HYDRA_CTRLMODE_LISTEN 0x02 struct kvaser_cmd_set_ctrlmode { u8 mode; u8 reserved[27]; } __packed; struct kvaser_err_frame_data { u8 bus_status; u8 reserved0; u8 tx_err_counter; u8 rx_err_counter; u8 reserved1[4]; } __packed; struct kvaser_cmd_rx_can { u8 cmd_len; u8 cmd_no; u8 channel; u8 flags; __le16 timestamp[3]; u8 dlc; u8 padding; __le32 id; union { u8 data[8]; struct kvaser_err_frame_data err_frame_data; }; } __packed; /* Extended CAN ID flag. Used in rx_can and tx_can */ #define KVASER_USB_HYDRA_EXTENDED_FRAME_ID BIT(31) struct kvaser_cmd_tx_can { __le32 id; u8 data[8]; u8 dlc; u8 flags; __le16 transid; u8 channel; u8 reserved[11]; } __packed; struct kvaser_cmd_tx_ack { __le32 id; u8 data[8]; u8 dlc; u8 flags; __le16 timestamp[3]; u8 reserved0[8]; } __packed; struct kvaser_cmd_header { u8 cmd_no; /* The destination HE address is stored in 0..5 of he_addr. * The upper part of source HE address is stored in 6..7 of he_addr, and * the lower part is stored in 12..15 of transid. */ u8 he_addr; __le16 transid; } __packed; struct kvaser_cmd { struct kvaser_cmd_header header; union { struct kvaser_cmd_map_ch_req map_ch_req; struct kvaser_cmd_map_ch_res map_ch_res; struct kvaser_cmd_card_info card_info; struct kvaser_cmd_sw_info sw_info; struct kvaser_cmd_sw_detail_req sw_detail_req; struct kvaser_cmd_sw_detail_res sw_detail_res; struct kvaser_cmd_cap_req cap_req; struct kvaser_cmd_cap_res cap_res; struct kvaser_cmd_error_event error_event; struct kvaser_cmd_set_busparams set_busparams_req; struct kvaser_cmd_get_busparams_req get_busparams_req; struct kvaser_cmd_get_busparams_res get_busparams_res; struct kvaser_cmd_chip_state_event chip_state_event; struct kvaser_cmd_set_ctrlmode set_ctrlmode; struct kvaser_cmd_rx_can rx_can; struct kvaser_cmd_tx_can tx_can; struct kvaser_cmd_tx_ack tx_ack; } __packed; } __packed; /* CAN frame flags. Used in rx_can, ext_rx_can, tx_can and ext_tx_can */ #define KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME BIT(0) #define KVASER_USB_HYDRA_CF_FLAG_OVERRUN BIT(1) #define KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME BIT(4) #define KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID BIT(5) #define KVASER_USB_HYDRA_CF_FLAG_TX_ACK BIT(6) /* CAN frame flags. Used in ext_rx_can and ext_tx_can */ #define KVASER_USB_HYDRA_CF_FLAG_OSM_NACK BIT(12) #define KVASER_USB_HYDRA_CF_FLAG_ABL BIT(13) #define KVASER_USB_HYDRA_CF_FLAG_FDF BIT(16) #define KVASER_USB_HYDRA_CF_FLAG_BRS BIT(17) #define KVASER_USB_HYDRA_CF_FLAG_ESI BIT(18) /* KCAN packet header macros. Used in ext_rx_can and ext_tx_can */ #define KVASER_USB_KCAN_DATA_DLC_BITS 4 #define KVASER_USB_KCAN_DATA_DLC_SHIFT 8 #define KVASER_USB_KCAN_DATA_DLC_MASK \ GENMASK(KVASER_USB_KCAN_DATA_DLC_BITS - 1 + \ KVASER_USB_KCAN_DATA_DLC_SHIFT, \ KVASER_USB_KCAN_DATA_DLC_SHIFT) #define KVASER_USB_KCAN_DATA_BRS BIT(14) #define KVASER_USB_KCAN_DATA_FDF BIT(15) #define KVASER_USB_KCAN_DATA_OSM BIT(16) #define KVASER_USB_KCAN_DATA_AREQ BIT(31) #define KVASER_USB_KCAN_DATA_SRR BIT(31) #define KVASER_USB_KCAN_DATA_RTR BIT(29) #define KVASER_USB_KCAN_DATA_IDE BIT(30) struct kvaser_cmd_ext_rx_can { __le32 flags; __le32 id; __le32 kcan_id; __le32 kcan_header; __le64 timestamp; union { u8 kcan_payload[64]; struct kvaser_err_frame_data err_frame_data; }; } __packed; struct kvaser_cmd_ext_tx_can { __le32 flags; __le32 id; __le32 kcan_id; __le32 kcan_header; u8 databytes; u8 dlc; u8 reserved[6]; u8 kcan_payload[64]; } __packed; struct kvaser_cmd_ext_tx_ack { __le32 flags; u8 reserved0[4]; __le64 timestamp; u8 reserved1[8]; } __packed; /* struct for extended commands (CMD_EXTENDED) */ struct kvaser_cmd_ext { struct kvaser_cmd_header header; __le16 len; u8 cmd_no_ext; u8 reserved; union { struct kvaser_cmd_ext_rx_can rx_can; struct kvaser_cmd_ext_tx_can tx_can; struct kvaser_cmd_ext_tx_ack tx_ack; } __packed; } __packed; struct kvaser_usb_net_hydra_priv { int pending_get_busparams_type; }; static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .name = "kvaser_usb_kcan", .tseg1_min = 1, .tseg1_max = 255, .tseg2_min = 1, .tseg2_max = 32, .sjw_max = 16, .brp_min = 1, .brp_max = 8192, .brp_inc = 1, }; const struct can_bittiming_const kvaser_usb_flexc_bittiming_const = { .name = "kvaser_usb_flex", .tseg1_min = 4, .tseg1_max = 16, .tseg2_min = 2, .tseg2_max = 8, .sjw_max = 4, .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; static const struct can_bittiming_const kvaser_usb_hydra_rt_bittiming_c = { .name = "kvaser_usb_rt", .tseg1_min = 2, .tseg1_max = 96, .tseg2_min = 2, .tseg2_max = 32, .sjw_max = 32, .brp_min = 1, .brp_max = 1024, .brp_inc = 1, }; static const struct can_bittiming_const kvaser_usb_hydra_rtd_bittiming_c = { .name = "kvaser_usb_rt", .tseg1_min = 2, .tseg1_max = 39, .tseg2_min = 2, .tseg2_max = 8, .sjw_max = 8, .brp_min = 1, .brp_max = 1024, .brp_inc = 1, }; #define KVASER_USB_HYDRA_TRANSID_BITS 12 #define KVASER_USB_HYDRA_TRANSID_MASK \ GENMASK(KVASER_USB_HYDRA_TRANSID_BITS - 1, 0) #define KVASER_USB_HYDRA_HE_ADDR_SRC_MASK GENMASK(7, 6) #define KVASER_USB_HYDRA_HE_ADDR_DEST_MASK GENMASK(5, 0) #define KVASER_USB_HYDRA_HE_ADDR_SRC_BITS 2 static inline u16 kvaser_usb_hydra_get_cmd_transid(const struct kvaser_cmd *cmd) { return le16_to_cpu(cmd->header.transid) & KVASER_USB_HYDRA_TRANSID_MASK; } static inline void kvaser_usb_hydra_set_cmd_transid(struct kvaser_cmd *cmd, u16 transid) { cmd->header.transid = cpu_to_le16(transid & KVASER_USB_HYDRA_TRANSID_MASK); } static inline u8 kvaser_usb_hydra_get_cmd_src_he(const struct kvaser_cmd *cmd) { return (cmd->header.he_addr & KVASER_USB_HYDRA_HE_ADDR_SRC_MASK) >> KVASER_USB_HYDRA_HE_ADDR_SRC_BITS | le16_to_cpu(cmd->header.transid) >> KVASER_USB_HYDRA_TRANSID_BITS; } static inline void kvaser_usb_hydra_set_cmd_dest_he(struct kvaser_cmd *cmd, u8 dest_he) { cmd->header.he_addr = (cmd->header.he_addr & KVASER_USB_HYDRA_HE_ADDR_SRC_MASK) | (dest_he & KVASER_USB_HYDRA_HE_ADDR_DEST_MASK); } static u8 kvaser_usb_hydra_channel_from_cmd(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { int i; u8 channel = 0xff; u8 src_he = kvaser_usb_hydra_get_cmd_src_he(cmd); for (i = 0; i < KVASER_USB_MAX_NET_DEVICES; i++) { if (dev->card_data.hydra.channel_to_he[i] == src_he) { channel = i; break; } } return channel; } static u16 kvaser_usb_hydra_get_next_transid(struct kvaser_usb *dev) { unsigned long flags; u16 transid; struct kvaser_usb_dev_card_data_hydra *card_data = &dev->card_data.hydra; spin_lock_irqsave(&card_data->transid_lock, flags); transid = card_data->transid; if (transid >= KVASER_USB_HYDRA_MAX_TRANSID) transid = KVASER_USB_HYDRA_MIN_TRANSID; else transid++; card_data->transid = transid; spin_unlock_irqrestore(&card_data->transid_lock, flags); return transid; } static size_t kvaser_usb_hydra_cmd_size(struct kvaser_cmd *cmd) { size_t ret; if (cmd->header.cmd_no == CMD_EXTENDED) ret = le16_to_cpu(((struct kvaser_cmd_ext *)cmd)->len); else ret = sizeof(struct kvaser_cmd); return ret; } static struct kvaser_usb_net_priv * kvaser_usb_hydra_net_priv_from_cmd(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv = NULL; u8 channel = kvaser_usb_hydra_channel_from_cmd(dev, cmd); if (channel >= dev->nchannels) dev_err(&dev->intf->dev, "Invalid channel number (%d)\n", channel); else priv = dev->nets[channel]; return priv; } static ktime_t kvaser_usb_hydra_ktime_from_cmd(const struct kvaser_usb_dev_cfg *cfg, const struct kvaser_cmd *cmd) { ktime_t hwtstamp = 0; if (cmd->header.cmd_no == CMD_EXTENDED) { struct kvaser_cmd_ext *cmd_ext = (struct kvaser_cmd_ext *)cmd; if (cmd_ext->cmd_no_ext == CMD_RX_MESSAGE_FD) hwtstamp = kvaser_usb_timestamp64_to_ktime(cfg, cmd_ext->rx_can.timestamp); else if (cmd_ext->cmd_no_ext == CMD_TX_ACKNOWLEDGE_FD) hwtstamp = kvaser_usb_timestamp64_to_ktime(cfg, cmd_ext->tx_ack.timestamp); } else if (cmd->header.cmd_no == CMD_RX_MESSAGE) { hwtstamp = kvaser_usb_timestamp48_to_ktime(cfg, cmd->rx_can.timestamp); } else if (cmd->header.cmd_no == CMD_TX_ACKNOWLEDGE) { hwtstamp = kvaser_usb_timestamp48_to_ktime(cfg, cmd->tx_ack.timestamp); } return hwtstamp; } static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev, u8 cmd_no, int channel) { struct kvaser_cmd *cmd; size_t cmd_len; int err; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = cmd_no; cmd_len = kvaser_usb_hydra_cmd_size(cmd); if (channel < 0) { kvaser_usb_hydra_set_cmd_dest_he (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL); } else { if (channel >= KVASER_USB_MAX_NET_DEVICES) { dev_err(&dev->intf->dev, "channel (%d) out of range.\n", channel); err = -EINVAL; goto end; } kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[channel]); } kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); err = kvaser_usb_send_cmd(dev, cmd, cmd_len); if (err) goto end; end: kfree(cmd); return err; } static int kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv, u8 cmd_no) { struct kvaser_cmd *cmd; struct kvaser_usb *dev = priv->dev; size_t cmd_len; int err; cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); if (!cmd) return -ENOMEM; cmd->header.cmd_no = cmd_no; cmd_len = kvaser_usb_hydra_cmd_size(cmd); kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); err = kvaser_usb_send_cmd_async(priv, cmd, cmd_len); if (err) kfree(cmd); return err; } /* This function is used for synchronously waiting on hydra control commands. * Note: Compared to kvaser_usb_hydra_read_bulk_callback(), we never need to * handle partial hydra commands. Since hydra control commands are always * non-extended commands. */ static int kvaser_usb_hydra_wait_cmd(const struct kvaser_usb *dev, u8 cmd_no, struct kvaser_cmd *cmd) { void *buf; int err; unsigned long timeout = jiffies + msecs_to_jiffies(KVASER_USB_TIMEOUT); if (cmd->header.cmd_no == CMD_EXTENDED) { dev_err(&dev->intf->dev, "Wait for CMD_EXTENDED not allowed\n"); return -EINVAL; } buf = kzalloc(KVASER_USB_RX_BUFFER_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; do { int actual_len = 0; int pos = 0; err = kvaser_usb_recv_cmd(dev, buf, KVASER_USB_RX_BUFFER_SIZE, &actual_len); if (err < 0) goto end; while (pos < actual_len) { struct kvaser_cmd *tmp_cmd; size_t cmd_len; tmp_cmd = buf + pos; cmd_len = kvaser_usb_hydra_cmd_size(tmp_cmd); if (pos + cmd_len > actual_len) { dev_err_ratelimited(&dev->intf->dev, "Format error\n"); break; } if (tmp_cmd->header.cmd_no == cmd_no) { memcpy(cmd, tmp_cmd, cmd_len); goto end; } pos += cmd_len; } } while (time_before(jiffies, timeout)); err = -EINVAL; end: kfree(buf); return err; } static int kvaser_usb_hydra_map_channel_resp(struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { u8 he, channel; u16 transid = kvaser_usb_hydra_get_cmd_transid(cmd); struct kvaser_usb_dev_card_data_hydra *card_data = &dev->card_data.hydra; if (transid > 0x007f || transid < 0x0040) { dev_err(&dev->intf->dev, "CMD_MAP_CHANNEL_RESP, invalid transid: 0x%x\n", transid); return -EINVAL; } switch (transid) { case KVASER_USB_HYDRA_TRANSID_CANHE: case KVASER_USB_HYDRA_TRANSID_CANHE + 1: case KVASER_USB_HYDRA_TRANSID_CANHE + 2: case KVASER_USB_HYDRA_TRANSID_CANHE + 3: case KVASER_USB_HYDRA_TRANSID_CANHE + 4: channel = transid & 0x000f; he = cmd->map_ch_res.he_addr; card_data->channel_to_he[channel] = he; break; case KVASER_USB_HYDRA_TRANSID_SYSDBG: card_data->sysdbg_he = cmd->map_ch_res.he_addr; break; default: dev_warn(&dev->intf->dev, "Unknown CMD_MAP_CHANNEL_RESP transid=0x%x\n", transid); break; } return 0; } static int kvaser_usb_hydra_map_channel(struct kvaser_usb *dev, u16 transid, u8 channel, const char *name) { struct kvaser_cmd *cmd; int err; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; strcpy(cmd->map_ch_req.name, name); cmd->header.cmd_no = CMD_MAP_CHANNEL_REQ; kvaser_usb_hydra_set_cmd_dest_he (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ROUTER); cmd->map_ch_req.channel = channel; kvaser_usb_hydra_set_cmd_transid(cmd, transid); err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); if (err) goto end; err = kvaser_usb_hydra_wait_cmd(dev, CMD_MAP_CHANNEL_RESP, cmd); if (err) goto end; err = kvaser_usb_hydra_map_channel_resp(dev, cmd); if (err) goto end; end: kfree(cmd); return err; } static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev, u16 cap_cmd_req, u16 *status) { struct kvaser_usb_dev_card_data *card_data = &dev->card_data; struct kvaser_cmd *cmd; size_t cmd_len; u32 value = 0; u32 mask = 0; u16 cap_cmd_res; int err; int i; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = CMD_GET_CAPABILITIES_REQ; cmd_len = kvaser_usb_hydra_cmd_size(cmd); cmd->cap_req.cap_cmd = cpu_to_le16(cap_cmd_req); kvaser_usb_hydra_set_cmd_dest_he(cmd, card_data->hydra.sysdbg_he); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); err = kvaser_usb_send_cmd(dev, cmd, cmd_len); if (err) goto end; err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_CAPABILITIES_RESP, cmd); if (err) goto end; *status = le16_to_cpu(cmd->cap_res.status); if (*status != KVASER_USB_HYDRA_CAP_STAT_OK) goto end; cap_cmd_res = le16_to_cpu(cmd->cap_res.cap_cmd); switch (cap_cmd_res) { case KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE: case KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT: case KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT: value = le32_to_cpu(cmd->cap_res.value); mask = le32_to_cpu(cmd->cap_res.mask); break; default: dev_warn(&dev->intf->dev, "Unknown capability command %u\n", cap_cmd_res); break; } for (i = 0; i < dev->nchannels; i++) { if (BIT(i) & (value & mask)) { switch (cap_cmd_res) { case KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE: card_data->ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; break; case KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT: card_data->capabilities |= KVASER_USB_CAP_BERR_CAP; break; case KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT: card_data->ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT; break; } } } end: kfree(cmd); return err; } static void kvaser_usb_hydra_start_chip_reply(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; if (completion_done(&priv->start_comp) && netif_queue_stopped(priv->netdev)) { netif_wake_queue(priv->netdev); } else { netif_start_queue(priv->netdev); complete(&priv->start_comp); } } static void kvaser_usb_hydra_stop_chip_reply(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; complete(&priv->stop_comp); } static void kvaser_usb_hydra_flush_queue_reply(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; complete(&priv->flush_comp); } static void kvaser_usb_hydra_get_busparams_reply(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv; struct kvaser_usb_net_hydra_priv *hydra; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; hydra = priv->sub_priv; if (!hydra) return; switch (hydra->pending_get_busparams_type) { case KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN: memcpy(&priv->busparams_nominal, &cmd->get_busparams_res.busparams, sizeof(priv->busparams_nominal)); break; case KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD: memcpy(&priv->busparams_data, &cmd->get_busparams_res.busparams, sizeof(priv->busparams_nominal)); break; default: dev_warn(&dev->intf->dev, "Unknown get_busparams_type %d\n", hydra->pending_get_busparams_type); break; } hydra->pending_get_busparams_type = -1; complete(&priv->get_busparams_comp); } static void kvaser_usb_hydra_bus_status_to_can_state(const struct kvaser_usb_net_priv *priv, u8 bus_status, const struct can_berr_counter *bec, enum can_state *new_state) { if (bus_status & KVASER_USB_HYDRA_BUS_BUS_OFF) { *new_state = CAN_STATE_BUS_OFF; } else if (bus_status & KVASER_USB_HYDRA_BUS_ERR_PASS) { *new_state = CAN_STATE_ERROR_PASSIVE; } else if (bus_status == KVASER_USB_HYDRA_BUS_ERR_ACT) { if (bec->txerr >= 128 || bec->rxerr >= 128) { netdev_warn(priv->netdev, "ERR_ACTIVE but err tx=%u or rx=%u >=128\n", bec->txerr, bec->rxerr); *new_state = CAN_STATE_ERROR_PASSIVE; } else if (bec->txerr >= 96 || bec->rxerr >= 96) { *new_state = CAN_STATE_ERROR_WARNING; } else { *new_state = CAN_STATE_ERROR_ACTIVE; } } } static void kvaser_usb_hydra_change_state(struct kvaser_usb_net_priv *priv, const struct can_berr_counter *bec, struct can_frame *cf, enum can_state new_state) { struct net_device *netdev = priv->netdev; enum can_state old_state = priv->can.state; enum can_state tx_state, rx_state; tx_state = (bec->txerr >= bec->rxerr) ? new_state : CAN_STATE_ERROR_ACTIVE; rx_state = (bec->txerr <= bec->rxerr) ? new_state : CAN_STATE_ERROR_ACTIVE; can_change_state(netdev, cf, tx_state, rx_state); if (new_state == CAN_STATE_BUS_OFF && old_state < CAN_STATE_BUS_OFF) { if (priv->can.restart_ms == 0) kvaser_usb_hydra_send_simple_cmd_async(priv, CMD_STOP_CHIP_REQ); can_bus_off(netdev); } if (priv->can.restart_ms && old_state >= CAN_STATE_BUS_OFF && new_state < CAN_STATE_BUS_OFF) { priv->can.can_stats.restarts++; if (cf) cf->can_id |= CAN_ERR_RESTARTED; } if (cf && new_state != CAN_STATE_BUS_OFF) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = bec->txerr; cf->data[7] = bec->rxerr; } } static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv, u8 bus_status, const struct can_berr_counter *bec) { struct net_device *netdev = priv->netdev; struct can_frame *cf; struct sk_buff *skb; enum can_state new_state, old_state; old_state = priv->can.state; kvaser_usb_hydra_bus_status_to_can_state(priv, bus_status, bec, &new_state); if (new_state == old_state) return; /* Ignore state change if previous state was STOPPED and the new state * is BUS_OFF. Firmware always report this as BUS_OFF, since firmware * does not distinguish between BUS_OFF and STOPPED. */ if (old_state == CAN_STATE_STOPPED && new_state == CAN_STATE_BUS_OFF) return; skb = alloc_can_err_skb(netdev, &cf); kvaser_usb_hydra_change_state(priv, bec, cf, new_state); if (skb) netif_rx(skb); else netdev_warn(netdev, "No memory left for err_skb\n"); } static void kvaser_usb_hydra_state_event(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv; struct can_berr_counter bec; u8 bus_status; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; bus_status = cmd->chip_state_event.bus_status; bec.txerr = cmd->chip_state_event.tx_err_counter; bec.rxerr = cmd->chip_state_event.rx_err_counter; kvaser_usb_hydra_update_state(priv, bus_status, &bec); priv->bec.txerr = bec.txerr; priv->bec.rxerr = bec.rxerr; } static void kvaser_usb_hydra_error_event_parameter(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { /* info1 will contain the offending cmd_no */ switch (le16_to_cpu(cmd->error_event.info1)) { case CMD_START_CHIP_REQ: dev_warn(&dev->intf->dev, "CMD_START_CHIP_REQ error in parameter\n"); break; case CMD_STOP_CHIP_REQ: dev_warn(&dev->intf->dev, "CMD_STOP_CHIP_REQ error in parameter\n"); break; case CMD_FLUSH_QUEUE: dev_warn(&dev->intf->dev, "CMD_FLUSH_QUEUE error in parameter\n"); break; case CMD_SET_BUSPARAMS_REQ: dev_warn(&dev->intf->dev, "Set bittiming failed. Error in parameter\n"); break; case CMD_SET_BUSPARAMS_FD_REQ: dev_warn(&dev->intf->dev, "Set data bittiming failed. Error in parameter\n"); break; default: dev_warn(&dev->intf->dev, "Unhandled parameter error event cmd_no (%u)\n", le16_to_cpu(cmd->error_event.info1)); break; } } static void kvaser_usb_hydra_error_event(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { switch (cmd->error_event.error_code) { case KVASER_USB_HYDRA_ERROR_EVENT_PARAM: kvaser_usb_hydra_error_event_parameter(dev, cmd); break; case KVASER_USB_HYDRA_ERROR_EVENT_CAN: /* Wrong channel mapping?! This should never happen! * info1 will contain the offending cmd_no */ dev_err(&dev->intf->dev, "Received CAN error event for cmd_no (%u)\n", le16_to_cpu(cmd->error_event.info1)); break; default: dev_warn(&dev->intf->dev, "Unhandled error event (%d)\n", cmd->error_event.error_code); break; } } static void kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv, const struct kvaser_err_frame_data *err_frame_data, ktime_t hwtstamp) { struct net_device *netdev = priv->netdev; struct net_device_stats *stats = &netdev->stats; struct can_frame *cf = NULL; struct sk_buff *skb = NULL; struct can_berr_counter bec; enum can_state new_state, old_state; u8 bus_status; priv->can.can_stats.bus_error++; stats->rx_errors++; bus_status = err_frame_data->bus_status; bec.txerr = err_frame_data->tx_err_counter; bec.rxerr = err_frame_data->rx_err_counter; old_state = priv->can.state; kvaser_usb_hydra_bus_status_to_can_state(priv, bus_status, &bec, &new_state); if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) skb = alloc_can_err_skb(netdev, &cf); if (new_state != old_state) kvaser_usb_hydra_change_state(priv, &bec, cf, new_state); if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) { if (skb) { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); shhwtstamps->hwtstamp = hwtstamp; cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_CNT; cf->data[6] = bec.txerr; cf->data[7] = bec.rxerr; netif_rx(skb); } else { stats->rx_dropped++; netdev_warn(netdev, "No memory left for err_skb\n"); } } priv->bec.txerr = bec.txerr; priv->bec.rxerr = bec.rxerr; } static void kvaser_usb_hydra_one_shot_fail(struct kvaser_usb_net_priv *priv, const struct kvaser_cmd_ext *cmd) { struct net_device *netdev = priv->netdev; struct net_device_stats *stats = &netdev->stats; struct can_frame *cf; struct sk_buff *skb; u32 flags; skb = alloc_can_err_skb(netdev, &cf); if (!skb) { stats->rx_dropped++; netdev_warn(netdev, "No memory left for err_skb\n"); return; } cf->can_id |= CAN_ERR_BUSERROR; flags = le32_to_cpu(cmd->tx_ack.flags); if (flags & KVASER_USB_HYDRA_CF_FLAG_OSM_NACK) cf->can_id |= CAN_ERR_ACK; if (flags & KVASER_USB_HYDRA_CF_FLAG_ABL) { cf->can_id |= CAN_ERR_LOSTARB; priv->can.can_stats.arbitration_lost++; } stats->tx_errors++; netif_rx(skb); } static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_tx_urb_context *context; struct kvaser_usb_net_priv *priv; unsigned long irq_flags; unsigned int len; bool one_shot_fail = false; bool is_err_frame = false; u16 transid = kvaser_usb_hydra_get_cmd_transid(cmd); struct sk_buff *skb; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; if (!netif_device_present(priv->netdev)) return; if (cmd->header.cmd_no == CMD_EXTENDED) { struct kvaser_cmd_ext *cmd_ext = (struct kvaser_cmd_ext *)cmd; u32 flags = le32_to_cpu(cmd_ext->tx_ack.flags); if (flags & (KVASER_USB_HYDRA_CF_FLAG_OSM_NACK | KVASER_USB_HYDRA_CF_FLAG_ABL)) { kvaser_usb_hydra_one_shot_fail(priv, cmd_ext); one_shot_fail = true; } is_err_frame = flags & KVASER_USB_HYDRA_CF_FLAG_TX_ACK && flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME; } context = &priv->tx_contexts[transid % dev->max_tx_urbs]; spin_lock_irqsave(&priv->tx_contexts_lock, irq_flags); skb = priv->can.echo_skb[context->echo_index]; if (skb) skb_hwtstamps(skb)->hwtstamp = kvaser_usb_hydra_ktime_from_cmd(dev->cfg, cmd); len = can_get_echo_skb(priv->netdev, context->echo_index, NULL); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(priv->netdev); spin_unlock_irqrestore(&priv->tx_contexts_lock, irq_flags); if (!one_shot_fail && !is_err_frame) { struct net_device_stats *stats = &priv->netdev->stats; stats->tx_packets++; stats->tx_bytes += len; } } static void kvaser_usb_hydra_rx_msg_std(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { struct kvaser_usb_net_priv *priv = NULL; struct can_frame *cf; struct sk_buff *skb; struct skb_shared_hwtstamps *shhwtstamps; struct net_device_stats *stats; u8 flags; ktime_t hwtstamp; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); if (!priv) return; stats = &priv->netdev->stats; flags = cmd->rx_can.flags; hwtstamp = kvaser_usb_hydra_ktime_from_cmd(dev->cfg, cmd); if (flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME) { kvaser_usb_hydra_error_frame(priv, &cmd->rx_can.err_frame_data, hwtstamp); return; } skb = alloc_can_skb(priv->netdev, &cf); if (!skb) { stats->rx_dropped++; return; } shhwtstamps = skb_hwtstamps(skb); shhwtstamps->hwtstamp = hwtstamp; cf->can_id = le32_to_cpu(cmd->rx_can.id); if (cf->can_id & KVASER_USB_HYDRA_EXTENDED_FRAME_ID) { cf->can_id &= CAN_EFF_MASK; cf->can_id |= CAN_EFF_FLAG; } else { cf->can_id &= CAN_SFF_MASK; } if (flags & KVASER_USB_HYDRA_CF_FLAG_OVERRUN) kvaser_usb_can_rx_over_error(priv->netdev); can_frame_set_cc_len((struct can_frame *)cf, cmd->rx_can.dlc, priv->can.ctrlmode); if (flags & KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME) { cf->can_id |= CAN_RTR_FLAG; } else { memcpy(cf->data, cmd->rx_can.data, cf->len); stats->rx_bytes += cf->len; } stats->rx_packets++; netif_rx(skb); } static void kvaser_usb_hydra_rx_msg_ext(const struct kvaser_usb *dev, const struct kvaser_cmd_ext *cmd) { struct kvaser_cmd *std_cmd = (struct kvaser_cmd *)cmd; struct kvaser_usb_net_priv *priv; struct canfd_frame *cf; struct sk_buff *skb; struct skb_shared_hwtstamps *shhwtstamps; struct net_device_stats *stats; u32 flags; u8 dlc; u32 kcan_header; ktime_t hwtstamp; priv = kvaser_usb_hydra_net_priv_from_cmd(dev, std_cmd); if (!priv) return; stats = &priv->netdev->stats; kcan_header = le32_to_cpu(cmd->rx_can.kcan_header); dlc = (kcan_header & KVASER_USB_KCAN_DATA_DLC_MASK) >> KVASER_USB_KCAN_DATA_DLC_SHIFT; flags = le32_to_cpu(cmd->rx_can.flags); hwtstamp = kvaser_usb_hydra_ktime_from_cmd(dev->cfg, std_cmd); if (flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME) { kvaser_usb_hydra_error_frame(priv, &cmd->rx_can.err_frame_data, hwtstamp); return; } if (flags & KVASER_USB_HYDRA_CF_FLAG_FDF) skb = alloc_canfd_skb(priv->netdev, &cf); else skb = alloc_can_skb(priv->netdev, (struct can_frame **)&cf); if (!skb) { stats->rx_dropped++; return; } shhwtstamps = skb_hwtstamps(skb); shhwtstamps->hwtstamp = hwtstamp; cf->can_id = le32_to_cpu(cmd->rx_can.id); if (flags & KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID) { cf->can_id &= CAN_EFF_MASK; cf->can_id |= CAN_EFF_FLAG; } else { cf->can_id &= CAN_SFF_MASK; } if (flags & KVASER_USB_HYDRA_CF_FLAG_OVERRUN) kvaser_usb_can_rx_over_error(priv->netdev); if (flags & KVASER_USB_HYDRA_CF_FLAG_FDF) { cf->len = can_fd_dlc2len(dlc); if (flags & KVASER_USB_HYDRA_CF_FLAG_BRS) cf->flags |= CANFD_BRS; if (flags & KVASER_USB_HYDRA_CF_FLAG_ESI) cf->flags |= CANFD_ESI; } else { can_frame_set_cc_len((struct can_frame *)cf, dlc, priv->can.ctrlmode); } if (flags & KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME) { cf->can_id |= CAN_RTR_FLAG; } else { memcpy(cf->data, cmd->rx_can.kcan_payload, cf->len); stats->rx_bytes += cf->len; } stats->rx_packets++; netif_rx(skb); } static void kvaser_usb_hydra_handle_cmd_std(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { switch (cmd->header.cmd_no) { case CMD_START_CHIP_RESP: kvaser_usb_hydra_start_chip_reply(dev, cmd); break; case CMD_STOP_CHIP_RESP: kvaser_usb_hydra_stop_chip_reply(dev, cmd); break; case CMD_FLUSH_QUEUE_RESP: kvaser_usb_hydra_flush_queue_reply(dev, cmd); break; case CMD_CHIP_STATE_EVENT: kvaser_usb_hydra_state_event(dev, cmd); break; case CMD_GET_BUSPARAMS_RESP: kvaser_usb_hydra_get_busparams_reply(dev, cmd); break; case CMD_ERROR_EVENT: kvaser_usb_hydra_error_event(dev, cmd); break; case CMD_TX_ACKNOWLEDGE: kvaser_usb_hydra_tx_acknowledge(dev, cmd); break; case CMD_RX_MESSAGE: kvaser_usb_hydra_rx_msg_std(dev, cmd); break; /* Ignored commands */ case CMD_SET_BUSPARAMS_RESP: case CMD_SET_BUSPARAMS_FD_RESP: break; default: dev_warn(&dev->intf->dev, "Unhandled command (%d)\n", cmd->header.cmd_no); break; } } static void kvaser_usb_hydra_handle_cmd_ext(const struct kvaser_usb *dev, const struct kvaser_cmd_ext *cmd) { switch (cmd->cmd_no_ext) { case CMD_TX_ACKNOWLEDGE_FD: kvaser_usb_hydra_tx_acknowledge(dev, (struct kvaser_cmd *)cmd); break; case CMD_RX_MESSAGE_FD: kvaser_usb_hydra_rx_msg_ext(dev, cmd); break; default: dev_warn(&dev->intf->dev, "Unhandled extended command (%d)\n", cmd->header.cmd_no); break; } } static void kvaser_usb_hydra_handle_cmd(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { if (cmd->header.cmd_no == CMD_EXTENDED) kvaser_usb_hydra_handle_cmd_ext (dev, (struct kvaser_cmd_ext *)cmd); else kvaser_usb_hydra_handle_cmd_std(dev, cmd); } static void * kvaser_usb_hydra_frame_to_cmd_ext(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *cmd_len, u16 transid) { struct kvaser_usb *dev = priv->dev; struct kvaser_cmd_ext *cmd; struct canfd_frame *cf = (struct canfd_frame *)skb->data; u8 dlc; u8 nbr_of_bytes = cf->len; u32 flags; u32 id; u32 kcan_id; u32 kcan_header; cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); if (!cmd) return NULL; kvaser_usb_hydra_set_cmd_dest_he ((struct kvaser_cmd *)cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid((struct kvaser_cmd *)cmd, transid); cmd->header.cmd_no = CMD_EXTENDED; cmd->cmd_no_ext = CMD_TX_CAN_MESSAGE_FD; *cmd_len = ALIGN(sizeof(struct kvaser_cmd_ext) - sizeof(cmd->tx_can.kcan_payload) + nbr_of_bytes, 8); cmd->len = cpu_to_le16(*cmd_len); if (can_is_canfd_skb(skb)) dlc = can_fd_len2dlc(cf->len); else dlc = can_get_cc_dlc((struct can_frame *)cf, priv->can.ctrlmode); cmd->tx_can.databytes = nbr_of_bytes; cmd->tx_can.dlc = dlc; if (cf->can_id & CAN_EFF_FLAG) { id = cf->can_id & CAN_EFF_MASK; flags = KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID; kcan_id = (cf->can_id & CAN_EFF_MASK) | KVASER_USB_KCAN_DATA_IDE | KVASER_USB_KCAN_DATA_SRR; } else { id = cf->can_id & CAN_SFF_MASK; flags = 0; kcan_id = cf->can_id & CAN_SFF_MASK; } if (cf->can_id & CAN_ERR_FLAG) flags |= KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME; kcan_header = ((dlc << KVASER_USB_KCAN_DATA_DLC_SHIFT) & KVASER_USB_KCAN_DATA_DLC_MASK) | KVASER_USB_KCAN_DATA_AREQ | (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT ? KVASER_USB_KCAN_DATA_OSM : 0); if (can_is_canfd_skb(skb)) { kcan_header |= KVASER_USB_KCAN_DATA_FDF | (cf->flags & CANFD_BRS ? KVASER_USB_KCAN_DATA_BRS : 0); } else { if (cf->can_id & CAN_RTR_FLAG) { kcan_id |= KVASER_USB_KCAN_DATA_RTR; cmd->tx_can.databytes = 0; flags |= KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME; } } cmd->tx_can.kcan_id = cpu_to_le32(kcan_id); cmd->tx_can.id = cpu_to_le32(id); cmd->tx_can.flags = cpu_to_le32(flags); cmd->tx_can.kcan_header = cpu_to_le32(kcan_header); memcpy(cmd->tx_can.kcan_payload, cf->data, nbr_of_bytes); return cmd; } static void * kvaser_usb_hydra_frame_to_cmd_std(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *cmd_len, u16 transid) { struct kvaser_usb *dev = priv->dev; struct kvaser_cmd *cmd; struct can_frame *cf = (struct can_frame *)skb->data; u32 flags; u32 id; cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); if (!cmd) return NULL; kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid(cmd, transid); cmd->header.cmd_no = CMD_TX_CAN_MESSAGE; *cmd_len = ALIGN(sizeof(struct kvaser_cmd), 8); if (cf->can_id & CAN_EFF_FLAG) { id = (cf->can_id & CAN_EFF_MASK); id |= KVASER_USB_HYDRA_EXTENDED_FRAME_ID; } else { id = cf->can_id & CAN_SFF_MASK; } cmd->tx_can.dlc = can_get_cc_dlc(cf, priv->can.ctrlmode); flags = (cf->can_id & CAN_EFF_FLAG ? KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID : 0); if (cf->can_id & CAN_RTR_FLAG) flags |= KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME; flags |= (cf->can_id & CAN_ERR_FLAG ? KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME : 0); cmd->tx_can.id = cpu_to_le32(id); cmd->tx_can.flags = flags; memcpy(cmd->tx_can.data, cf->data, cf->len); return cmd; } static int kvaser_usb_hydra_set_mode(struct net_device *netdev, enum can_mode mode) { int err = 0; switch (mode) { case CAN_MODE_START: /* CAN controller automatically recovers from BUS_OFF */ break; default: err = -EOPNOTSUPP; } return err; } static int kvaser_usb_hydra_get_busparams(struct kvaser_usb_net_priv *priv, int busparams_type) { struct kvaser_usb *dev = priv->dev; struct kvaser_usb_net_hydra_priv *hydra = priv->sub_priv; struct kvaser_cmd *cmd; size_t cmd_len; int err; if (!hydra) return -EINVAL; cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = CMD_GET_BUSPARAMS_REQ; cmd_len = kvaser_usb_hydra_cmd_size(cmd); kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); cmd->get_busparams_req.type = busparams_type; hydra->pending_get_busparams_type = busparams_type; reinit_completion(&priv->get_busparams_comp); err = kvaser_usb_send_cmd(dev, cmd, cmd_len); if (err) return err; if (!wait_for_completion_timeout(&priv->get_busparams_comp, msecs_to_jiffies(KVASER_USB_TIMEOUT))) return -ETIMEDOUT; return err; } static int kvaser_usb_hydra_get_nominal_busparams(struct kvaser_usb_net_priv *priv) { return kvaser_usb_hydra_get_busparams(priv, KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN); } static int kvaser_usb_hydra_get_data_busparams(struct kvaser_usb_net_priv *priv) { return kvaser_usb_hydra_get_busparams(priv, KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD); } static int kvaser_usb_hydra_set_bittiming(const struct net_device *netdev, const struct kvaser_usb_busparams *busparams) { struct kvaser_cmd *cmd; struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; size_t cmd_len; int err; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = CMD_SET_BUSPARAMS_REQ; cmd_len = kvaser_usb_hydra_cmd_size(cmd); memcpy(&cmd->set_busparams_req.busparams_nominal, busparams, sizeof(cmd->set_busparams_req.busparams_nominal)); kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); err = kvaser_usb_send_cmd(dev, cmd, cmd_len); kfree(cmd); return err; } static int kvaser_usb_hydra_set_data_bittiming(const struct net_device *netdev, const struct kvaser_usb_busparams *busparams) { struct kvaser_cmd *cmd; struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; size_t cmd_len; int err; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = CMD_SET_BUSPARAMS_FD_REQ; cmd_len = kvaser_usb_hydra_cmd_size(cmd); memcpy(&cmd->set_busparams_req.busparams_data, busparams, sizeof(cmd->set_busparams_req.busparams_data)); if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) cmd->set_busparams_req.canfd_mode = KVASER_USB_HYDRA_BUS_MODE_NONISO; else cmd->set_busparams_req.canfd_mode = KVASER_USB_HYDRA_BUS_MODE_CANFD_ISO; } kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); err = kvaser_usb_send_cmd(dev, cmd, cmd_len); kfree(cmd); return err; } static int kvaser_usb_hydra_get_berr_counter(const struct net_device *netdev, struct can_berr_counter *bec) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); int err; err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_GET_CHIP_STATE_REQ, priv->channel); if (err) return err; *bec = priv->bec; return 0; } static int kvaser_usb_hydra_setup_endpoints(struct kvaser_usb *dev) { const struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *ep; int i; iface_desc = dev->intf->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { ep = &iface_desc->endpoint[i].desc; if (!dev->bulk_in && usb_endpoint_is_bulk_in(ep) && ep->bEndpointAddress == KVASER_USB_HYDRA_BULK_EP_IN_ADDR) dev->bulk_in = ep; if (!dev->bulk_out && usb_endpoint_is_bulk_out(ep) && ep->bEndpointAddress == KVASER_USB_HYDRA_BULK_EP_OUT_ADDR) dev->bulk_out = ep; if (dev->bulk_in && dev->bulk_out) return 0; } return -ENODEV; } static int kvaser_usb_hydra_init_card(struct kvaser_usb *dev) { int err; unsigned int i; struct kvaser_usb_dev_card_data_hydra *card_data = &dev->card_data.hydra; card_data->transid = KVASER_USB_HYDRA_MIN_TRANSID; spin_lock_init(&card_data->transid_lock); memset(card_data->usb_rx_leftover, 0, KVASER_USB_HYDRA_MAX_CMD_LEN); card_data->usb_rx_leftover_len = 0; spin_lock_init(&card_data->usb_rx_leftover_lock); memset(card_data->channel_to_he, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL, sizeof(card_data->channel_to_he)); card_data->sysdbg_he = 0; for (i = 0; i < KVASER_USB_MAX_NET_DEVICES; i++) { err = kvaser_usb_hydra_map_channel (dev, (KVASER_USB_HYDRA_TRANSID_CANHE | i), i, "CAN"); if (err) { dev_err(&dev->intf->dev, "CMD_MAP_CHANNEL_REQ failed for CAN%u\n", i); return err; } } err = kvaser_usb_hydra_map_channel(dev, KVASER_USB_HYDRA_TRANSID_SYSDBG, 0, "SYSDBG"); if (err) { dev_err(&dev->intf->dev, "CMD_MAP_CHANNEL_REQ failed for SYSDBG\n"); return err; } return 0; } static int kvaser_usb_hydra_init_channel(struct kvaser_usb_net_priv *priv) { struct kvaser_usb_net_hydra_priv *hydra; hydra = devm_kzalloc(&priv->dev->intf->dev, sizeof(*hydra), GFP_KERNEL); if (!hydra) return -ENOMEM; priv->sub_priv = hydra; return 0; } static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev) { struct kvaser_cmd cmd; int err; err = kvaser_usb_hydra_send_simple_cmd(dev, CMD_GET_SOFTWARE_INFO_REQ, -1); if (err) return err; memset(&cmd, 0, sizeof(struct kvaser_cmd)); err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_SOFTWARE_INFO_RESP, &cmd); if (err) return err; dev->max_tx_urbs = min_t(unsigned int, KVASER_USB_MAX_TX_URBS, le16_to_cpu(cmd.sw_info.max_outstanding_tx)); return 0; } static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev) { struct kvaser_cmd *cmd; size_t cmd_len; int err; u32 flags; struct kvaser_usb_dev_card_data *card_data = &dev->card_data; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = CMD_GET_SOFTWARE_DETAILS_REQ; cmd_len = kvaser_usb_hydra_cmd_size(cmd); cmd->sw_detail_req.use_ext_cmd = 1; kvaser_usb_hydra_set_cmd_dest_he (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); err = kvaser_usb_send_cmd(dev, cmd, cmd_len); if (err) goto end; err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_SOFTWARE_DETAILS_RESP, cmd); if (err) goto end; dev->fw_version = le32_to_cpu(cmd->sw_detail_res.sw_version); flags = le32_to_cpu(cmd->sw_detail_res.sw_flags); if (flags & KVASER_USB_HYDRA_SW_FLAG_FW_BAD) { dev_err(&dev->intf->dev, "Bad firmware, device refuse to run!\n"); err = -EINVAL; goto end; } if (flags & KVASER_USB_HYDRA_SW_FLAG_FW_BETA) dev_info(&dev->intf->dev, "Beta firmware in use\n"); if (flags & KVASER_USB_HYDRA_SW_FLAG_EXT_CAP) card_data->capabilities |= KVASER_USB_CAP_EXT_CAP; if (flags & KVASER_USB_HYDRA_SW_FLAG_EXT_CMD) card_data->capabilities |= KVASER_USB_HYDRA_CAP_EXT_CMD; if (flags & KVASER_USB_HYDRA_SW_FLAG_CANFD) card_data->ctrlmode_supported |= CAN_CTRLMODE_FD; if (flags & KVASER_USB_HYDRA_SW_FLAG_NONISO) card_data->ctrlmode_supported |= CAN_CTRLMODE_FD_NON_ISO; if (flags & KVASER_USB_HYDRA_SW_FLAG_FREQ_80M) dev->cfg = &kvaser_usb_hydra_dev_cfg_kcan; else if (flags & KVASER_USB_HYDRA_SW_FLAG_CAN_FREQ_80M) dev->cfg = &kvaser_usb_hydra_dev_cfg_rt; else dev->cfg = &kvaser_usb_hydra_dev_cfg_flexc; end: kfree(cmd); return err; } static int kvaser_usb_hydra_get_card_info(struct kvaser_usb *dev) { struct kvaser_cmd cmd; int err; err = kvaser_usb_hydra_send_simple_cmd(dev, CMD_GET_CARD_INFO_REQ, -1); if (err) return err; memset(&cmd, 0, sizeof(struct kvaser_cmd)); err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_CARD_INFO_RESP, &cmd); if (err) return err; dev->nchannels = cmd.card_info.nchannels; if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES) return -EINVAL; return 0; } static int kvaser_usb_hydra_get_capabilities(struct kvaser_usb *dev) { int err; u16 status; if (!(dev->card_data.capabilities & KVASER_USB_CAP_EXT_CAP)) { dev_info(&dev->intf->dev, "No extended capability support. Upgrade your device.\n"); return 0; } err = kvaser_usb_hydra_get_single_capability (dev, KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE, &status); if (err) return err; if (status) dev_info(&dev->intf->dev, "KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE failed %u\n", status); err = kvaser_usb_hydra_get_single_capability (dev, KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT, &status); if (err) return err; if (status) dev_info(&dev->intf->dev, "KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT failed %u\n", status); err = kvaser_usb_hydra_get_single_capability (dev, KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT, &status); if (err) return err; if (status) dev_info(&dev->intf->dev, "KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT failed %u\n", status); return 0; } static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv) { struct kvaser_usb *dev = priv->dev; struct kvaser_cmd *cmd; size_t cmd_len; int err; if ((priv->can.ctrlmode & (CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO)) == CAN_CTRLMODE_FD_NON_ISO) { netdev_warn(priv->netdev, "CTRLMODE_FD shall be on if CTRLMODE_FD_NON_ISO is on\n"); return -EINVAL; } cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->header.cmd_no = CMD_SET_DRIVERMODE_REQ; cmd_len = kvaser_usb_hydra_cmd_size(cmd); kvaser_usb_hydra_set_cmd_dest_he (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); kvaser_usb_hydra_set_cmd_transid (cmd, kvaser_usb_hydra_get_next_transid(dev)); if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_LISTEN; else cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_NORMAL; err = kvaser_usb_send_cmd(dev, cmd, cmd_len); kfree(cmd); return err; } static int kvaser_usb_hydra_start_chip(struct kvaser_usb_net_priv *priv) { int err; reinit_completion(&priv->start_comp); err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_START_CHIP_REQ, priv->channel); if (err) return err; if (!wait_for_completion_timeout(&priv->start_comp, msecs_to_jiffies(KVASER_USB_TIMEOUT))) return -ETIMEDOUT; return 0; } static int kvaser_usb_hydra_stop_chip(struct kvaser_usb_net_priv *priv) { int err; reinit_completion(&priv->stop_comp); /* Make sure we do not report invalid BUS_OFF from CMD_CHIP_STATE_EVENT * see comment in kvaser_usb_hydra_update_state() */ priv->can.state = CAN_STATE_STOPPED; err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_STOP_CHIP_REQ, priv->channel); if (err) return err; if (!wait_for_completion_timeout(&priv->stop_comp, msecs_to_jiffies(KVASER_USB_TIMEOUT))) return -ETIMEDOUT; return 0; } static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv) { int err; reinit_completion(&priv->flush_comp); err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_FLUSH_QUEUE, priv->channel); if (err) return err; if (!wait_for_completion_timeout(&priv->flush_comp, msecs_to_jiffies(KVASER_USB_TIMEOUT))) return -ETIMEDOUT; return 0; } /* A single extended hydra command can be transmitted in multiple transfers * We have to buffer partial hydra commands, and handle them on next callback. */ static void kvaser_usb_hydra_read_bulk_callback(struct kvaser_usb *dev, void *buf, int len) { unsigned long irq_flags; struct kvaser_cmd *cmd; int pos = 0; size_t cmd_len; struct kvaser_usb_dev_card_data_hydra *card_data = &dev->card_data.hydra; int usb_rx_leftover_len; spinlock_t *usb_rx_leftover_lock = &card_data->usb_rx_leftover_lock; spin_lock_irqsave(usb_rx_leftover_lock, irq_flags); usb_rx_leftover_len = card_data->usb_rx_leftover_len; if (usb_rx_leftover_len) { int remaining_bytes; cmd = (struct kvaser_cmd *)card_data->usb_rx_leftover; cmd_len = kvaser_usb_hydra_cmd_size(cmd); remaining_bytes = min_t(unsigned int, len, cmd_len - usb_rx_leftover_len); /* Make sure we do not overflow usb_rx_leftover */ if (remaining_bytes + usb_rx_leftover_len > KVASER_USB_HYDRA_MAX_CMD_LEN) { dev_err(&dev->intf->dev, "Format error\n"); spin_unlock_irqrestore(usb_rx_leftover_lock, irq_flags); return; } memcpy(card_data->usb_rx_leftover + usb_rx_leftover_len, buf, remaining_bytes); pos += remaining_bytes; if (remaining_bytes + usb_rx_leftover_len == cmd_len) { kvaser_usb_hydra_handle_cmd(dev, cmd); usb_rx_leftover_len = 0; } else { /* Command still not complete */ usb_rx_leftover_len += remaining_bytes; } card_data->usb_rx_leftover_len = usb_rx_leftover_len; } spin_unlock_irqrestore(usb_rx_leftover_lock, irq_flags); while (pos < len) { cmd = buf + pos; cmd_len = kvaser_usb_hydra_cmd_size(cmd); if (pos + cmd_len > len) { /* We got first part of a command */ int leftover_bytes; leftover_bytes = len - pos; /* Make sure we do not overflow usb_rx_leftover */ if (leftover_bytes > KVASER_USB_HYDRA_MAX_CMD_LEN) { dev_err(&dev->intf->dev, "Format error\n"); return; } spin_lock_irqsave(usb_rx_leftover_lock, irq_flags); memcpy(card_data->usb_rx_leftover, buf + pos, leftover_bytes); card_data->usb_rx_leftover_len = leftover_bytes; spin_unlock_irqrestore(usb_rx_leftover_lock, irq_flags); break; } kvaser_usb_hydra_handle_cmd(dev, cmd); pos += cmd_len; } } static void * kvaser_usb_hydra_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *cmd_len, u16 transid) { void *buf; if (priv->dev->card_data.capabilities & KVASER_USB_HYDRA_CAP_EXT_CMD) buf = kvaser_usb_hydra_frame_to_cmd_ext(priv, skb, cmd_len, transid); else buf = kvaser_usb_hydra_frame_to_cmd_std(priv, skb, cmd_len, transid); return buf; } const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops = { .dev_set_mode = kvaser_usb_hydra_set_mode, .dev_set_bittiming = kvaser_usb_hydra_set_bittiming, .dev_get_busparams = kvaser_usb_hydra_get_nominal_busparams, .dev_set_data_bittiming = kvaser_usb_hydra_set_data_bittiming, .dev_get_data_busparams = kvaser_usb_hydra_get_data_busparams, .dev_get_berr_counter = kvaser_usb_hydra_get_berr_counter, .dev_setup_endpoints = kvaser_usb_hydra_setup_endpoints, .dev_init_card = kvaser_usb_hydra_init_card, .dev_init_channel = kvaser_usb_hydra_init_channel, .dev_get_software_info = kvaser_usb_hydra_get_software_info, .dev_get_software_details = kvaser_usb_hydra_get_software_details, .dev_get_card_info = kvaser_usb_hydra_get_card_info, .dev_get_capabilities = kvaser_usb_hydra_get_capabilities, .dev_set_opt_mode = kvaser_usb_hydra_set_opt_mode, .dev_start_chip = kvaser_usb_hydra_start_chip, .dev_stop_chip = kvaser_usb_hydra_stop_chip, .dev_reset_chip = NULL, .dev_flush_queue = kvaser_usb_hydra_flush_queue, .dev_read_bulk_callback = kvaser_usb_hydra_read_bulk_callback, .dev_frame_to_cmd = kvaser_usb_hydra_frame_to_cmd, }; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan = { .clock = { .freq = 80 * MEGA /* Hz */, }, .timestamp_freq = 80, .bittiming_const = &kvaser_usb_hydra_kcan_bittiming_c, .data_bittiming_const = &kvaser_usb_hydra_kcan_bittiming_c, }; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = { .clock = { .freq = 24 * MEGA /* Hz */, }, .timestamp_freq = 1, .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt = { .clock = { .freq = 80 * MEGA /* Hz */, }, .timestamp_freq = 24, .bittiming_const = &kvaser_usb_hydra_rt_bittiming_c, .data_bittiming_const = &kvaser_usb_hydra_rtd_bittiming_c, }; |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (state->pf) { #ifdef CONFIG_NF_TABLES_IPV4 case NFPROTO_IPV4: nft_set_pktinfo_ipv4(&pkt); break; #endif #ifdef CONFIG_NF_TABLES_IPV6 case NFPROTO_IPV6: nft_set_pktinfo_ipv6(&pkt); break; #endif default: break; } return nft_do_chain(&pkt, priv); } #ifdef CONFIG_NF_TABLES_IPV4 static const struct nft_chain_type nft_chain_nat_ipv4 = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_IPV4, .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .hooks = { [NF_INET_PRE_ROUTING] = nft_nat_do_chain, [NF_INET_POST_ROUTING] = nft_nat_do_chain, [NF_INET_LOCAL_OUT] = nft_nat_do_chain, [NF_INET_LOCAL_IN] = nft_nat_do_chain, }, .ops_register = nf_nat_ipv4_register_fn, .ops_unregister = nf_nat_ipv4_unregister_fn, }; #endif #ifdef CONFIG_NF_TABLES_IPV6 static const struct nft_chain_type nft_chain_nat_ipv6 = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_IPV6, .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .hooks = { [NF_INET_PRE_ROUTING] = nft_nat_do_chain, [NF_INET_POST_ROUTING] = nft_nat_do_chain, [NF_INET_LOCAL_OUT] = nft_nat_do_chain, [NF_INET_LOCAL_IN] = nft_nat_do_chain, }, .ops_register = nf_nat_ipv6_register_fn, .ops_unregister = nf_nat_ipv6_unregister_fn, }; #endif #ifdef CONFIG_NF_TABLES_INET static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops) { return nf_nat_inet_register_fn(net, ops); } static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops) { nf_nat_inet_unregister_fn(net, ops); } static const struct nft_chain_type nft_chain_nat_inet = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_INET, .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_PRE_ROUTING] = nft_nat_do_chain, [NF_INET_LOCAL_IN] = nft_nat_do_chain, [NF_INET_LOCAL_OUT] = nft_nat_do_chain, [NF_INET_POST_ROUTING] = nft_nat_do_chain, }, .ops_register = nft_nat_inet_reg, .ops_unregister = nft_nat_inet_unreg, }; #endif static int __init nft_chain_nat_init(void) { #ifdef CONFIG_NF_TABLES_IPV6 nft_register_chain_type(&nft_chain_nat_ipv6); #endif #ifdef CONFIG_NF_TABLES_IPV4 nft_register_chain_type(&nft_chain_nat_ipv4); #endif #ifdef CONFIG_NF_TABLES_INET nft_register_chain_type(&nft_chain_nat_inet); #endif return 0; } static void __exit nft_chain_nat_exit(void) { #ifdef CONFIG_NF_TABLES_IPV4 nft_unregister_chain_type(&nft_chain_nat_ipv4); #endif #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_chain_type(&nft_chain_nat_ipv6); #endif #ifdef CONFIG_NF_TABLES_INET nft_unregister_chain_type(&nft_chain_nat_inet); #endif } module_init(nft_chain_nat_init); module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("nftables network address translation support"); #ifdef CONFIG_NF_TABLES_IPV4 MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); #endif #ifdef CONFIG_NF_TABLES_IPV6 MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); #endif #ifdef CONFIG_NF_TABLES_INET MODULE_ALIAS_NFT_CHAIN(1, "nat"); /* NFPROTO_INET */ #endif |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 | /* * Copyright (C) 2017 Netronome Systems, Inc. * * This software is licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this * source tree. * * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mutex.h> #include <linux/rtnetlink.h> #include <net/pkt_cls.h> #include "netdevsim.h" #define pr_vlog(env, fmt, ...) \ bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__) struct nsim_bpf_bound_prog { struct nsim_dev *nsim_dev; struct bpf_prog *prog; struct dentry *ddir; const char *state; bool is_loaded; struct list_head l; }; #define NSIM_BPF_MAX_KEYS 2 struct nsim_bpf_bound_map { struct netdevsim *ns; struct bpf_offloaded_map *map; struct mutex mutex; struct nsim_map_entry { void *key; void *value; } entry[NSIM_BPF_MAX_KEYS]; struct list_head l; }; static int nsim_bpf_string_show(struct seq_file *file, void *data) { const char **str = file->private; if (*str) seq_printf(file, "%s\n", *str); return 0; } DEFINE_SHOW_ATTRIBUTE(nsim_bpf_string); static int nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) { struct nsim_bpf_bound_prog *state; int ret = 0; state = env->prog->aux->offload->dev_priv; if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx) msleep(state->nsim_dev->bpf_bind_verifier_delay); if (insn_idx == env->prog->len - 1) { pr_vlog(env, "Hello from netdevsim!\n"); if (!state->nsim_dev->bpf_bind_verifier_accept) ret = -EOPNOTSUPP; } return ret; } static int nsim_bpf_finalize(struct bpf_verifier_env *env) { return 0; } static bool nsim_xdp_offload_active(struct netdevsim *ns) { return ns->xdp_hw.prog; } static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) { struct nsim_bpf_bound_prog *state; if (!prog || !bpf_prog_is_offloaded(prog->aux)) return; state = prog->aux->offload->dev_priv; state->is_loaded = loaded; } static int nsim_bpf_offload(struct netdevsim *ns, struct bpf_prog *prog, bool oldprog) { nsim_prog_set_loaded(ns->bpf_offloaded, false); WARN(!!ns->bpf_offloaded != oldprog, "bad offload state, expected offload %sto be active", oldprog ? "" : "not "); ns->bpf_offloaded = prog; ns->bpf_offloaded_id = prog ? prog->aux->id : 0; nsim_prog_set_loaded(prog, true); return 0; } int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct tc_cls_bpf_offload *cls_bpf = type_data; struct bpf_prog *prog = cls_bpf->prog; struct netdevsim *ns = cb_priv; struct bpf_prog *oldprog; if (type != TC_SETUP_CLSBPF) { NSIM_EA(cls_bpf->common.extack, "only offload of BPF classifiers supported"); return -EOPNOTSUPP; } if (!tc_cls_can_offload_and_chain0(ns->netdev, &cls_bpf->common)) return -EOPNOTSUPP; if (cls_bpf->common.protocol != htons(ETH_P_ALL)) { NSIM_EA(cls_bpf->common.extack, "only ETH_P_ALL supported as filter protocol"); return -EOPNOTSUPP; } if (!ns->bpf_tc_accept) { NSIM_EA(cls_bpf->common.extack, "netdevsim configured to reject BPF TC offload"); return -EOPNOTSUPP; } /* Note: progs without skip_sw will probably not be dev bound */ if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) { NSIM_EA(cls_bpf->common.extack, "netdevsim configured to reject unbound programs"); return -EOPNOTSUPP; } if (cls_bpf->command != TC_CLSBPF_OFFLOAD) return -EOPNOTSUPP; oldprog = cls_bpf->oldprog; /* Don't remove if oldprog doesn't match driver's state */ if (ns->bpf_offloaded != oldprog) { oldprog = NULL; if (!cls_bpf->prog) return 0; if (ns->bpf_offloaded) { NSIM_EA(cls_bpf->common.extack, "driver and netdev offload states mismatch"); return -EBUSY; } } return nsim_bpf_offload(ns, cls_bpf->prog, oldprog); } int nsim_bpf_disable_tc(struct netdevsim *ns) { if (ns->bpf_offloaded && !nsim_xdp_offload_active(ns)) return -EBUSY; return 0; } static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf) { if (!nsim_xdp_offload_active(ns) && !bpf->prog) return 0; if (!nsim_xdp_offload_active(ns) && bpf->prog && ns->bpf_offloaded) { NSIM_EA(bpf->extack, "TC program is already loaded"); return -EBUSY; } return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns)); } static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, struct xdp_attachment_info *xdp) { int err; if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); return -EOPNOTSUPP; } if (bpf->command == XDP_SETUP_PROG_HW && !ns->bpf_xdpoffload_accept) { NSIM_EA(bpf->extack, "XDP offload disabled in DebugFS"); return -EOPNOTSUPP; } if (bpf->command == XDP_SETUP_PROG_HW) { err = nsim_xdp_offload_prog(ns, bpf); if (err) return err; } xdp_attachment_setup(xdp, bpf); return 0; } static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev, struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; char name[16]; int ret; state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; state->nsim_dev = nsim_dev; state->prog = prog; state->state = "verify"; /* Program id is not populated yet when we create the state. */ sprintf(name, "%u", nsim_dev->prog_id_gen++); state->ddir = debugfs_create_dir(name, nsim_dev->ddir_bpf_bound_progs); if (IS_ERR(state->ddir)) { ret = PTR_ERR(state->ddir); kfree(state); return ret; } debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id); debugfs_create_file("state", 0400, state->ddir, &state->state, &nsim_bpf_string_fops); debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded); list_add_tail(&state->l, &nsim_dev->bpf_bound_progs); prog->aux->offload->dev_priv = state; return 0; } static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { struct nsim_dev *nsim_dev = bpf_offload_dev_priv(prog->aux->offload->offdev); if (!nsim_dev->bpf_bind_accept) return -EOPNOTSUPP; return nsim_bpf_create_prog(nsim_dev, prog); } static int nsim_bpf_translate(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv; state->state = "xlated"; return 0; } static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; state = prog->aux->offload->dev_priv; WARN(state->is_loaded, "offload state destroyed while program still bound"); debugfs_remove_recursive(state->ddir); list_del(&state->l); kfree(state); } static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .insn_hook = nsim_bpf_verify_insn, .finalize = nsim_bpf_finalize, .prepare = nsim_bpf_verifier_prep, .translate = nsim_bpf_translate, .destroy = nsim_bpf_destroy_prog, }; static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) { if (bpf->prog && bpf->prog->aux->offload) { NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv"); return -EINVAL; } if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) { NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled"); return -EINVAL; } return 0; } static int nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) { struct nsim_bpf_bound_prog *state; if (!bpf->prog) return 0; if (!bpf_prog_is_offloaded(bpf->prog->aux)) { NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); return -EINVAL; } state = bpf->prog->aux->offload->dev_priv; if (WARN_ON(strcmp(state->state, "xlated"))) { NSIM_EA(bpf->extack, "offloading program in bad state"); return -EINVAL; } return 0; } static bool nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key) { return e->key && !memcmp(key, e->key, map->key_size); } static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; unsigned int i; for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key)) return i; return -ENOENT; } static int nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].key) return -ENOMEM; nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].value) { kfree(nmap->entry[idx].key); nmap->entry[idx].key = NULL; return -ENOMEM; } return 0; } static int nsim_map_get_next_key(struct bpf_offloaded_map *offmap, void *key, void *next_key) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx = -ENOENT; mutex_lock(&nmap->mutex); if (key) idx = nsim_map_key_find(offmap, key); if (idx == -ENOENT) idx = 0; else idx++; for (; idx < ARRAY_SIZE(nmap->entry); idx++) { if (nmap->entry[idx].key) { memcpy(next_key, nmap->entry[idx].key, offmap->map.key_size); break; } } mutex_unlock(&nmap->mutex); if (idx == ARRAY_SIZE(nmap->entry)) return -ENOENT; return 0; } static int nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx; mutex_lock(&nmap->mutex); idx = nsim_map_key_find(offmap, key); if (idx >= 0) memcpy(value, nmap->entry[idx].value, offmap->map.value_size); mutex_unlock(&nmap->mutex); return idx < 0 ? idx : 0; } static int nsim_map_update_elem(struct bpf_offloaded_map *offmap, void *key, void *value, u64 flags) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx, err = 0; mutex_lock(&nmap->mutex); idx = nsim_map_key_find(offmap, key); if (idx < 0 && flags == BPF_EXIST) { err = idx; goto exit_unlock; } if (idx >= 0 && flags == BPF_NOEXIST) { err = -EEXIST; goto exit_unlock; } if (idx < 0) { for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++) if (!nmap->entry[idx].key) break; if (idx == ARRAY_SIZE(nmap->entry)) { err = -E2BIG; goto exit_unlock; } err = nsim_map_alloc_elem(offmap, idx); if (err) goto exit_unlock; } memcpy(nmap->entry[idx].key, key, offmap->map.key_size); memcpy(nmap->entry[idx].value, value, offmap->map.value_size); exit_unlock: mutex_unlock(&nmap->mutex); return err; } static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx; if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) return -EINVAL; mutex_lock(&nmap->mutex); idx = nsim_map_key_find(offmap, key); if (idx >= 0) { kfree(nmap->entry[idx].key); kfree(nmap->entry[idx].value); memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx])); } mutex_unlock(&nmap->mutex); return idx < 0 ? idx : 0; } static const struct bpf_map_dev_ops nsim_bpf_map_ops = { .map_get_next_key = nsim_map_get_next_key, .map_lookup_elem = nsim_map_lookup_elem, .map_update_elem = nsim_map_update_elem, .map_delete_elem = nsim_map_delete_elem, }; static int nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) { struct nsim_bpf_bound_map *nmap; int i, err; if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY && offmap->map.map_type != BPF_MAP_TYPE_HASH)) return -EINVAL; if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS) return -ENOMEM; if (offmap->map.map_flags) return -EINVAL; nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT); if (!nmap) return -ENOMEM; offmap->dev_priv = nmap; nmap->ns = ns; nmap->map = offmap; mutex_init(&nmap->mutex); if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) { for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { u32 *key; err = nsim_map_alloc_elem(offmap, i); if (err) goto err_free; key = nmap->entry[i].key; *key = i; memset(nmap->entry[i].value, 0, offmap->map.value_size); } } offmap->dev_ops = &nsim_bpf_map_ops; list_add_tail(&nmap->l, &ns->nsim_dev->bpf_bound_maps); return 0; err_free: while (--i >= 0) { kfree(nmap->entry[i].key); kfree(nmap->entry[i].value); } kfree(nmap); return err; } static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; unsigned int i; for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { kfree(nmap->entry[i].key); kfree(nmap->entry[i].value); } list_del_init(&nmap->l); mutex_destroy(&nmap->mutex); kfree(nmap); } int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct netdevsim *ns = netdev_priv(dev); int err; ASSERT_RTNL(); switch (bpf->command) { case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); if (err) return err; return nsim_xdp_set_prog(ns, bpf, &ns->xdp); case XDP_SETUP_PROG_HW: err = nsim_setup_prog_hw_checks(ns, bpf); if (err) return err; return nsim_xdp_set_prog(ns, bpf, &ns->xdp_hw); case BPF_OFFLOAD_MAP_ALLOC: if (!ns->bpf_map_accept) return -EOPNOTSUPP; return nsim_bpf_map_alloc(ns, bpf->offmap); case BPF_OFFLOAD_MAP_FREE: nsim_bpf_map_free(bpf->offmap); return 0; default: return -EINVAL; } } int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) { int err; INIT_LIST_HEAD(&nsim_dev->bpf_bound_progs); INIT_LIST_HEAD(&nsim_dev->bpf_bound_maps); nsim_dev->ddir_bpf_bound_progs = debugfs_create_dir("bpf_bound_progs", nsim_dev->ddir); if (IS_ERR(nsim_dev->ddir_bpf_bound_progs)) return PTR_ERR(nsim_dev->ddir_bpf_bound_progs); nsim_dev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, nsim_dev); err = PTR_ERR_OR_ZERO(nsim_dev->bpf_dev); if (err) return err; nsim_dev->bpf_bind_accept = true; debugfs_create_bool("bpf_bind_accept", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_accept); debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_verifier_delay); nsim_dev->bpf_bind_verifier_accept = true; debugfs_create_bool("bpf_bind_verifier_accept", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_verifier_accept); return 0; } void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev) { WARN_ON(!list_empty(&nsim_dev->bpf_bound_progs)); WARN_ON(!list_empty(&nsim_dev->bpf_bound_maps)); bpf_offload_dev_destroy(nsim_dev->bpf_dev); } int nsim_bpf_init(struct netdevsim *ns) { struct dentry *ddir = ns->nsim_dev_port->ddir; int err; err = bpf_offload_dev_netdev_register(ns->nsim_dev->bpf_dev, ns->netdev); if (err) return err; debugfs_create_u32("bpf_offloaded_id", 0400, ddir, &ns->bpf_offloaded_id); ns->bpf_tc_accept = true; debugfs_create_bool("bpf_tc_accept", 0600, ddir, &ns->bpf_tc_accept); debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ddir, &ns->bpf_tc_non_bound_accept); ns->bpf_xdpdrv_accept = true; debugfs_create_bool("bpf_xdpdrv_accept", 0600, ddir, &ns->bpf_xdpdrv_accept); ns->bpf_xdpoffload_accept = true; debugfs_create_bool("bpf_xdpoffload_accept", 0600, ddir, &ns->bpf_xdpoffload_accept); ns->bpf_map_accept = true; debugfs_create_bool("bpf_map_accept", 0600, ddir, &ns->bpf_map_accept); return 0; } void nsim_bpf_uninit(struct netdevsim *ns) { WARN_ON(ns->xdp.prog); WARN_ON(ns->xdp_hw.prog); WARN_ON(ns->bpf_offloaded); bpf_offload_dev_netdev_unregister(ns->nsim_dev->bpf_dev, ns->netdev); } |
1506 1507 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/if.h> #include <linux/if_vlan.h> #include <net/udp_tunnel.h> #include <net/sch_generic.h> #include <linux/netfilter.h> #include <rdma/ib_addr.h> #include "rxe.h" #include "rxe_net.h" #include "rxe_loc.h" static struct rxe_recv_sockets recv_sockets; static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, struct net_device *ndev, struct in_addr *saddr, struct in_addr *daddr) { struct rtable *rt; struct flowi4 fl = { { 0 } }; memset(&fl, 0, sizeof(fl)); fl.flowi4_oif = ndev->ifindex; memcpy(&fl.saddr, saddr, sizeof(*saddr)); memcpy(&fl.daddr, daddr, sizeof(*daddr)); fl.flowi4_proto = IPPROTO_UDP; rt = ip_route_output_key(&init_net, &fl); if (IS_ERR(rt)) { rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); return NULL; } return &rt->dst; } #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { struct dst_entry *ndst; struct flowi6 fl6 = { { 0 } }; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = ndev->ifindex; memcpy(&fl6.saddr, saddr, sizeof(*saddr)); memcpy(&fl6.daddr, daddr, sizeof(*daddr)); fl6.flowi6_proto = IPPROTO_UDP; ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), recv_sockets.sk6->sk, &fl6, NULL); if (IS_ERR(ndst)) { rxe_dbg_qp(qp, "no route to %pI6\n", daddr); return NULL; } if (unlikely(ndst->error)) { rxe_dbg_qp(qp, "no route to %pI6\n", daddr); goto put; } return ndst; put: dst_release(ndst); return NULL; } #else static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { return NULL; } #endif static struct dst_entry *rxe_find_route(struct net_device *ndev, struct rxe_qp *qp, struct rxe_av *av) { struct dst_entry *dst = NULL; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); if (!dst || !dst_check(dst, qp->dst_cookie)) { if (dst) dst_release(dst); if (av->network_type == RXE_NETWORK_TYPE_IPV4) { struct in_addr *saddr; struct in_addr *daddr; saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; dst = rxe_find_route4(qp, ndev, saddr, daddr); } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; dst = rxe_find_route6(qp, ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); #endif } if (dst && (qp_type(qp) == IB_QPT_RC)) { dst_hold(dst); sk_dst_set(qp->sk->sk, dst); } } return dst; } static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct rxe_dev *rxe; struct net_device *ndev = skb->dev; struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); /* takes a reference on rxe->ib_dev * drop when skb is freed */ rxe = rxe_get_dev_from_net(ndev); if (!rxe && is_vlan_dev(ndev)) rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); if (!rxe) goto drop; if (skb_linearize(skb)) { ib_device_put(&rxe->ib_dev); goto drop; } udph = udp_hdr(skb); pkt->rxe = rxe; pkt->port_num = 1; pkt->hdr = (u8 *)(udph + 1); pkt->mask = RXE_GRH_MASK; pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); /* remove udp header */ skb_pull(skb, sizeof(struct udphdr)); rxe_rcv(skb); return 0; drop: kfree_skb(skb); return 0; } static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, bool ipv6) { int err; struct socket *sock; struct udp_port_cfg udp_cfg = { }; struct udp_tunnel_sock_cfg tnl_cfg = { }; if (ipv6) { udp_cfg.family = AF_INET6; udp_cfg.ipv6_v6only = 1; } else { udp_cfg.family = AF_INET; } udp_cfg.local_udp_port = port; /* Create UDP socket */ err = udp_sock_create(net, &udp_cfg, &sock); if (err < 0) return ERR_PTR(err); tnl_cfg.encap_type = 1; tnl_cfg.encap_rcv = rxe_udp_encap_recv; /* Setup UDP tunnel */ setup_udp_tunnel_sock(net, sock, &tnl_cfg); return sock; } static void rxe_release_udp_tunnel(struct socket *sk) { if (sk) udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, __be16 dst_port) { struct udphdr *udph; __skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); udph = udp_hdr(skb); udph->dest = dst_port; udph->source = src_port; udph->len = htons(skb->len); udph->check = 0; } static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, __be32 saddr, __be32 daddr, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { struct iphdr *iph; skb_scrub_packet(skb, xnet); skb_clear_hash(skb); skb_dst_set(skb, dst_clone(dst)); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = IPVERSION; iph->ihl = sizeof(struct iphdr) >> 2; iph->tot_len = htons(skb->len); iph->frag_off = df; iph->protocol = proto; iph->tos = tos; iph->daddr = daddr; iph->saddr = saddr; iph->ttl = ttl; __ip_select_ident(dev_net(dst->dev), iph, skb_shinfo(skb)->gso_segs ?: 1); } static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, __u8 proto, __u8 prio, __u8 ttl) { struct ipv6hdr *ip6h; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_set(skb, dst_clone(dst)); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); ip6_flow_hdr(ip6h, prio, htonl(0)); ip6h->payload_len = htons(skb->len); ip6h->nexthdr = proto; ip6h->hop_limit = ttl; ip6h->daddr = *daddr; ip6h->saddr = *saddr; ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; dst = rxe_find_route(skb->dev, qp, av); if (!dst) { rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), cpu_to_be16(ROCE_V2_UDP_DPORT)); prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, av->grh.traffic_class, av->grh.hop_limit, df, xnet); dst_release(dst); return 0; } static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; dst = rxe_find_route(skb->dev, qp, av); if (!dst) { rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), cpu_to_be16(ROCE_V2_UDP_DPORT)); prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, av->grh.traffic_class, av->grh.hop_limit); dst_release(dst); return 0; } int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) err = prepare6(av, pkt, skb); if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; } static void rxe_skb_tx_dtor(struct sk_buff *skb) { struct net_device *ndev = skb->dev; struct rxe_dev *rxe; unsigned int qp_index; struct rxe_qp *qp; int skb_out; rxe = rxe_get_dev_from_net(ndev); if (!rxe && is_vlan_dev(ndev)) rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); if (WARN_ON(!rxe)) return; qp_index = (int)(uintptr_t)skb->sk->sk_user_data; if (!qp_index) return; qp = rxe_pool_get_index(&rxe->qp_pool, qp_index); if (!qp) goto put_dev; skb_out = atomic_dec_return(&qp->skb_out); if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW) rxe_sched_task(&qp->send_task); rxe_put(qp); put_dev: ib_device_put(&rxe->ib_dev); sock_put(skb->sk); } static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) { int err; struct sock *sk = pkt->qp->sk->sk; sock_hold(sk); skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); else err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); return err; } /* fix up a send packet to match the packets * received from UDP before looping them back */ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) { struct sock *sk = pkt->qp->sk->sk; memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); sock_hold(sk); skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) skb_pull(skb, sizeof(struct iphdr)); else skb_pull(skb, sizeof(struct ipv6hdr)); if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) { kfree_skb(skb); return -EIO; } /* remove udp header */ skb_pull(skb, sizeof(struct udphdr)); rxe_rcv(skb); return 0; } int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct sk_buff *skb) { int err; int is_request = pkt->mask & RXE_REQ_MASK; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); unsigned long flags; spin_lock_irqsave(&qp->state_lock, flags); if ((is_request && (qp_state(qp) < IB_QPS_RTS)) || (!is_request && (qp_state(qp) < IB_QPS_RTR))) { spin_unlock_irqrestore(&qp->state_lock, flags); rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); goto drop; } spin_unlock_irqrestore(&qp->state_lock, flags); rxe_icrc_generate(skb, pkt); if (pkt->mask & RXE_LOOPBACK_MASK) err = rxe_loopback(skb, pkt); else err = rxe_send(skb, pkt); if (err) { rxe_counter_inc(rxe, RXE_CNT_SEND_ERR); return err; } rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); goto done; drop: kfree_skb(skb); err = 0; done: return err; } struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt) { unsigned int hdr_len; struct sk_buff *skb = NULL; struct net_device *ndev; const struct ib_gid_attr *attr; const int port_num = 1; attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); if (IS_ERR(attr)) return NULL; if (av->network_type == RXE_NETWORK_TYPE_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct iphdr); else hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct ipv6hdr); rcu_read_lock(); ndev = rdma_read_gid_attr_ndev_rcu(attr); if (IS_ERR(ndev)) { rcu_read_unlock(); goto out; } skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); if (unlikely(!skb)) { rcu_read_unlock(); goto out; } skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); /* FIXME: hold reference to this netdev until life of this skb. */ skb->dev = ndev; rcu_read_unlock(); if (av->network_type == RXE_NETWORK_TYPE_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); pkt->rxe = rxe; pkt->port_num = port_num; pkt->hdr = skb_put(skb, paylen); pkt->mask |= RXE_GRH_MASK; out: rdma_put_gid_attr(attr); return skb; } /* * this is required by rxe_cfg to match rxe devices in * /sys/class/infiniband up with their underlying ethernet devices */ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) { struct net_device *ndev; char *ndev_name; ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); if (!ndev) return NULL; ndev_name = ndev->name; dev_put(ndev); return ndev_name; } int rxe_net_add(const char *ibdev_name, struct net_device *ndev) { int err; struct rxe_dev *rxe = NULL; rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) return -ENOMEM; ib_mark_name_assigned_by_user(&rxe->ib_dev); err = rxe_add(rxe, ndev->mtu, ibdev_name, ndev); if (err) { ib_dealloc_device(&rxe->ib_dev); return err; } return 0; } static void rxe_port_event(struct rxe_dev *rxe, enum ib_event_type event) { struct ib_event ev; ev.device = &rxe->ib_dev; ev.element.port_num = 1; ev.event = event; ib_dispatch_event(&ev); } /* Caller must hold net_info_lock */ void rxe_port_up(struct rxe_dev *rxe) { rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); dev_info(&rxe->ib_dev.dev, "set active\n"); } /* Caller must hold net_info_lock */ void rxe_port_down(struct rxe_dev *rxe) { rxe_port_event(rxe, IB_EVENT_PORT_ERR); rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); dev_info(&rxe->ib_dev.dev, "set down\n"); } void rxe_set_port_state(struct rxe_dev *rxe) { struct net_device *ndev; ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); if (!ndev) return; if (ib_get_curr_port_state(ndev) == IB_PORT_ACTIVE) rxe_port_up(rxe); else rxe_port_down(rxe); dev_put(ndev); } static int rxe_notify(struct notifier_block *not_blk, unsigned long event, void *arg) { struct net_device *ndev = netdev_notifier_info_to_dev(arg); struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); if (!rxe) return NOTIFY_OK; switch (event) { case NETDEV_UNREGISTER: ib_unregister_device_queued(&rxe->ib_dev); break; case NETDEV_CHANGEMTU: rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_DOWN: case NETDEV_CHANGE: if (ib_get_curr_port_state(ndev) == IB_PORT_DOWN) rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); break; case NETDEV_REBOOT: case NETDEV_GOING_DOWN: case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: rxe_dbg_dev(rxe, "ignoring netdev event = %ld for %s\n", event, ndev->name); break; } ib_device_put(&rxe->ib_dev); return NOTIFY_OK; } static struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; static int rxe_net_ipv4_init(void) { recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { recv_sockets.sk4 = NULL; pr_err("Failed to create IPv4 UDP tunnel\n"); return -1; } return 0; } static int rxe_net_ipv6_init(void) { #if IS_ENABLED(CONFIG_IPV6) recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), true); if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) { recv_sockets.sk6 = NULL; pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n"); return 0; } if (IS_ERR(recv_sockets.sk6)) { recv_sockets.sk6 = NULL; pr_err("Failed to create IPv6 UDP tunnel\n"); return -1; } #endif return 0; } void rxe_net_exit(void) { rxe_release_udp_tunnel(recv_sockets.sk6); rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } int rxe_net_init(void) { int err; recv_sockets.sk6 = NULL; err = rxe_net_ipv4_init(); if (err) return err; err = rxe_net_ipv6_init(); if (err) goto err_out; err = register_netdevice_notifier(&rxe_net_notifier); if (err) { pr_err("Failed to register netdev notifier\n"); goto err_out; } return 0; err_out: rxe_net_exit(); return err; } |
2 2 1 6 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_PKT_SCHED_H #define __NET_PKT_SCHED_H #include <linux/jiffies.h> #include <linux/ktime.h> #include <linux/if_vlan.h> #include <linux/netdevice.h> #include <net/sch_generic.h> #include <net/net_namespace.h> #include <uapi/linux/pkt_sched.h> #define DEFAULT_TX_QUEUE_LEN 1000 #define STAB_SIZE_LOG_MAX 30 struct qdisc_walker { int stop; int skip; int count; int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); }; #define qdisc_priv(q) \ _Generic(q, \ const struct Qdisc * : (const void *)&q->privdata, \ struct Qdisc * : (void *)&q->privdata) static inline struct Qdisc *qdisc_from_priv(void *priv) { return container_of(priv, struct Qdisc, privdata); } /* Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth Normal IP packet size ~ 512byte, hence: 0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for 10Mbit ethernet. 10msec resolution -> <50Kbit/sec. The result: [34]86 is not good choice for QoS router :-( The things are not so bad, because we may use artificial clock evaluated by integration of network data flow in the most critical places. */ typedef u64 psched_time_t; typedef long psched_tdiff_t; /* Avoid doing 64 bit divide */ #define PSCHED_SHIFT 6 #define PSCHED_TICKS2NS(x) ((s64)(x) << PSCHED_SHIFT) #define PSCHED_NS2TICKS(x) ((x) >> PSCHED_SHIFT) #define PSCHED_TICKS_PER_SEC PSCHED_NS2TICKS(NSEC_PER_SEC) #define PSCHED_PASTPERFECT 0 static inline psched_time_t psched_get_time(void) { return PSCHED_NS2TICKS(ktime_get_ns()); } struct qdisc_watchdog { struct hrtimer timer; struct Qdisc *qdisc; }; void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid); void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, u64 delta_ns); static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) { return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL); } static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires)); } void qdisc_watchdog_cancel(struct qdisc_watchdog *wd); extern struct Qdisc_ops pfifo_qdisc_ops; extern struct Qdisc_ops bfifo_qdisc_ops; extern struct Qdisc_ops pfifo_head_drop_qdisc_ops; int fifo_set_limit(struct Qdisc *q, unsigned int limit); struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, unsigned int limit, struct netlink_ext_ack *extack); int register_qdisc(struct Qdisc_ops *qops); void unregister_qdisc(struct Qdisc_ops *qops); #define NET_SCH_ALIAS_PREFIX "net-sch-" #define MODULE_ALIAS_NET_SCH(id) MODULE_ALIAS(NET_SCH_ALIAS_PREFIX id) void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { __qdisc_run(q); qdisc_run_end(q); } } extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; /* Calculate maximal size of packet seen by hard_start_xmit routine of this device. */ static inline unsigned int psched_mtu(const struct net_device *dev) { return READ_ONCE(dev->mtu) + dev->hard_header_len; } static inline struct net *qdisc_net(struct Qdisc *q) { return dev_net(q->dev_queue->dev); } struct tc_query_caps_base { enum tc_setup_type type; void *caps; }; struct tc_cbs_qopt_offload { u8 enable; s32 queue; s32 hicredit; s32 locredit; s32 idleslope; s32 sendslope; }; struct tc_etf_qopt_offload { u8 enable; s32 queue; }; struct tc_mqprio_caps { bool validate_queue_counts:1; }; struct tc_mqprio_qopt_offload { /* struct tc_mqprio_qopt must always be the first element */ struct tc_mqprio_qopt qopt; struct netlink_ext_ack *extack; u16 mode; u16 shaper; u32 flags; u64 min_rate[TC_QOPT_MAX_QUEUE]; u64 max_rate[TC_QOPT_MAX_QUEUE]; unsigned long preemptible_tcs; }; struct tc_taprio_caps { bool supports_queue_max_sdu:1; bool gate_mask_per_txq:1; /* Device expects lower TXQ numbers to have higher priority over higher * TXQs, regardless of their TC mapping. DO NOT USE FOR NEW DRIVERS, * INSTEAD ENFORCE A PROPER TC:TXQ MAPPING COMING FROM USER SPACE. */ bool broken_mqprio:1; }; enum tc_taprio_qopt_cmd { TAPRIO_CMD_REPLACE, TAPRIO_CMD_DESTROY, TAPRIO_CMD_STATS, TAPRIO_CMD_QUEUE_STATS, }; /** * struct tc_taprio_qopt_stats - IEEE 802.1Qbv statistics * @window_drops: Frames that were dropped because they were too large to be * transmitted in any of the allotted time windows (open gates) for their * traffic class. * @tx_overruns: Frames still being transmitted by the MAC after the * transmission gate associated with their traffic class has closed. * Equivalent to `12.29.1.1.2 TransmissionOverrun` from 802.1Q-2018. */ struct tc_taprio_qopt_stats { u64 window_drops; u64 tx_overruns; }; struct tc_taprio_qopt_queue_stats { int queue; struct tc_taprio_qopt_stats stats; }; struct tc_taprio_sched_entry { u8 command; /* TC_TAPRIO_CMD_* */ /* The gate_mask in the offloading side refers to traffic classes */ u32 gate_mask; u32 interval; }; struct tc_taprio_qopt_offload { enum tc_taprio_qopt_cmd cmd; union { /* TAPRIO_CMD_STATS */ struct tc_taprio_qopt_stats stats; /* TAPRIO_CMD_QUEUE_STATS */ struct tc_taprio_qopt_queue_stats queue_stats; /* TAPRIO_CMD_REPLACE */ struct { struct tc_mqprio_qopt_offload mqprio; struct netlink_ext_ack *extack; ktime_t base_time; u64 cycle_time; u64 cycle_time_extension; u32 max_sdu[TC_MAX_QUEUE]; size_t num_entries; struct tc_taprio_sched_entry entries[]; }; }; }; #if IS_ENABLED(CONFIG_NET_SCH_TAPRIO) /* Reference counting */ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload *offload); void taprio_offload_free(struct tc_taprio_qopt_offload *offload); #else /* Reference counting */ static inline struct tc_taprio_qopt_offload * taprio_offload_get(struct tc_taprio_qopt_offload *offload) { return NULL; } static inline void taprio_offload_free(struct tc_taprio_qopt_offload *offload) { } #endif /* Ensure skb_mstamp_ns, which might have been populated with the txtime, is * not mistaken for a software timestamp, because this will otherwise prevent * the dispatch of hardware timestamps to the socket. */ static inline void skb_txtime_consumed(struct sk_buff *skb) { skb->tstamp = ktime_set(0, 0); } static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, unsigned long cl, struct qdisc_walker *arg) { if (arg->count >= arg->skip && arg->fn(sch, cl, arg) < 0) { arg->stop = 1; return false; } arg->count++; return true; } #endif |
1 22 1 2 23 23 52 41 18 53 52 53 42 18 52 8 40 14 22 8 8 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * * Authors: * John McCutchan <ttb@tentacle.dhs.org> * Robert Love <rml@novell.com> * * Copyright (C) 2005 John McCutchan * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure */ #include <linux/dcache.h> /* d_unlinked */ #include <linux/fs.h> /* struct inode */ #include <linux/fsnotify_backend.h> #include <linux/inotify.h> #include <linux/path.h> /* struct path */ #include <linux/slab.h> /* kmem_* */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/sched/mm.h> #include "inotify.h" /* * Check if 2 events contain the same information. */ static bool event_compare(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { struct inotify_event_info *old, *new; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); if (old->mask & FS_IN_IGNORED) return false; if ((old->mask == new->mask) && (old->wd == new->wd) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) return true; return false; } static int inotify_merge(struct fsnotify_group *group, struct fsnotify_event *event) { struct list_head *list = &group->notification_list; struct fsnotify_event *last_event; last_event = list_entry(list->prev, struct fsnotify_event, list); return event_compare(last_event, event); } int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; struct fsnotify_group *group = inode_mark->group; int ret; int len = 0, wd; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; if (name) { len = name->len; alloc_len += len + 1; } pr_debug("%s: group=%p mark=%p mask=%x\n", __func__, group, inode_mark, mask); i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); /* * We can be racing with mark being detached. Don't report event with * invalid wd. */ wd = READ_ONCE(i_mark->wd); if (wd == -1) return 0; /* * Whoever is interested in the event, pays for the allocation. Do not * trigger OOM killer in the target monitoring memcg as it may have * security repercussion. */ old_memcg = set_active_memcg(group->memcg); event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); set_active_memcg(old_memcg); if (unlikely(!event)) { /* * Treat lost event due to ENOMEM the same way as queue * overflow to let userspace know event was lost. */ fsnotify_queue_overflow(group); return -ENOMEM; } /* * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events * for fanotify. inotify never reported IN_ISDIR with those events. * It looks like an oversight, but to avoid the risk of breaking * existing inotify programs, mask the flag out from those events. */ if (mask & (IN_MOVE_SELF | IN_DELETE_SELF)) mask &= ~IN_ISDIR; fsn_event = &event->fse; fsnotify_init_event(fsn_event); event->mask = mask; event->wd = wd; event->sync_cookie = cookie; event->name_len = len; if (len) strscpy(event->name, name->name, event->name_len + 1); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); } if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; } static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { inotify_ignored_and_remove_idr(fsn_mark, group); } /* * This is NEVER supposed to be called. Inotify marks should either have been * removed from the idr when the watch was removed or in the * fsnotify_destroy_mark_by_group() call when the inotify instance was being * torn down. This is only called if the idr is about to be freed but there * are still marks in it. */ static int idr_callback(int id, void *p, void *data) { struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; static bool warned = false; if (warned) return 0; warned = true; fsn_mark = p; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in " "idr. Probably leaking memory\n", id, p, data); /* * I'm taking the liberty of assuming that the mark in question is a * valid address and I'm dereferencing it. This might help to figure * out why we got here and the panic is no worse than the original * BUG() that was here. */ if (fsn_mark) printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", fsn_mark->group, i_mark->wd); return 0; } static void inotify_free_group_priv(struct fsnotify_group *group) { /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); if (group->inotify_data.ucounts) dec_inotify_instances(group->inotify_data.ucounts); } static void inotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } /* ding dong the mark is dead */ static void inotify_free_mark(struct fsnotify_mark *fsn_mark) { struct inotify_inode_mark *i_mark; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); kmem_cache_free(inotify_inode_mark_cachep, i_mark); } const struct fsnotify_ops inotify_fsnotify_ops = { .handle_inode_event = inotify_handle_inode_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, .free_mark = inotify_free_mark, }; |
2 2 100 98 1 1 97 100 9 8 8 8 8 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 5 8 1 8 8 7 9 4 5 1 1 9 9 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 5 5 4 4 106 4 1 6 6 6 6 2 2 2 20 21 20 21 21 21 21 6 18 21 1 20 1 2 20 3 20 20 26 27 21 27 231 8 99 133 109 107 2 101 109 110 8 3 1 2 1 2 1 1 3 3 1 1 4 1 2 1 1 1 2 2 6 6 1 4 1 5 1 1 3 4 4 1 1 1 1 2 2 2 1 1 15 15 97 97 116 115 116 10 11 11 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 | // SPDX-License-Identifier: GPL-2.0 /* * Written for linux by Johan Myreen as a translation from * the assembly version by Linus (with diacriticals added) * * Some additional features added by Christoph Niemann (ChN), March 1993 * * Loadable keymaps by Risto Kankkunen, May 1993 * * Diacriticals redone & other small changes, aeb@cwi.nl, June 1993 * Added decr/incr_console, dynamic keymaps, Unicode support, * dynamic function/string keys, led setting, Sept 1994 * `Sticky' modifier keys, 951006. * * 11-11-96: SAK should now work in the raw mode (Martin Mares) * * Modified to provide 'generic' keyboard support by Hamish Macdonald * Merge with the m68k keyboard driver and split-off of the PC low-level * parts by Geert Uytterhoeven, May 1997 * * 27-05-97: Added support for the Magic SysRq Key (Martin Mares) * 30-07-98: Dead keys redone, aeb@cwi.nl. * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/consolemap.h> #include <linux/init.h> #include <linux/input.h> #include <linux/jiffies.h> #include <linux/kbd_diacr.h> #include <linux/kbd_kern.h> #include <linux/leds.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/nospec.h> #include <linux/notifier.h> #include <linux/reboot.h> #include <linux/sched/debug.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/tty_flip.h> #include <linux/tty.h> #include <linux/uaccess.h> #include <linux/vt_kern.h> #include <asm/irq_regs.h> /* * Exported functions/variables */ #define KBD_DEFMODE (BIT(VC_REPEAT) | BIT(VC_META)) #if defined(CONFIG_X86) || defined(CONFIG_PARISC) #include <asm/kbdleds.h> #else static inline int kbd_defleds(void) { return 0; } #endif #define KBD_DEFLOCK 0 /* * Handler Tables. */ #define K_HANDLERS\ k_self, k_fn, k_spec, k_pad,\ k_dead, k_cons, k_cur, k_shift,\ k_meta, k_ascii, k_lock, k_lowercase,\ k_slock, k_dead2, k_brl, k_ignore typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value, char up_flag); static k_handler_fn K_HANDLERS; static k_handler_fn *k_handler[16] = { K_HANDLERS }; #define FN_HANDLERS\ fn_null, fn_enter, fn_show_ptregs, fn_show_mem,\ fn_show_state, fn_send_intr, fn_lastcons, fn_caps_toggle,\ fn_num, fn_hold, fn_scroll_forw, fn_scroll_back,\ fn_boot_it, fn_caps_on, fn_compose, fn_SAK,\ fn_dec_console, fn_inc_console, fn_spawn_con, fn_bare_num typedef void (fn_handler_fn)(struct vc_data *vc); static fn_handler_fn FN_HANDLERS; static fn_handler_fn *fn_handler[] = { FN_HANDLERS }; /* * Variables exported for vt_ioctl.c */ struct vt_spawn_console vt_spawn_con = { .lock = __SPIN_LOCK_UNLOCKED(vt_spawn_con.lock), .pid = NULL, .sig = 0, }; /* * Internal Data. */ static struct kbd_struct kbd_table[MAX_NR_CONSOLES]; static struct kbd_struct *kbd = kbd_table; /* maximum values each key_handler can handle */ static const unsigned char max_vals[] = { [ KT_LATIN ] = 255, [ KT_FN ] = ARRAY_SIZE(func_table) - 1, [ KT_SPEC ] = ARRAY_SIZE(fn_handler) - 1, [ KT_PAD ] = NR_PAD - 1, [ KT_DEAD ] = NR_DEAD - 1, [ KT_CONS ] = 255, [ KT_CUR ] = 3, [ KT_SHIFT ] = NR_SHIFT - 1, [ KT_META ] = 255, [ KT_ASCII ] = NR_ASCII - 1, [ KT_LOCK ] = NR_LOCK - 1, [ KT_LETTER ] = 255, [ KT_SLOCK ] = NR_LOCK - 1, [ KT_DEAD2 ] = 255, [ KT_BRL ] = NR_BRL - 1, }; static const int NR_TYPES = ARRAY_SIZE(max_vals); static void kbd_bh(struct tasklet_struct *unused); static DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh); static struct input_handler kbd_handler; static DEFINE_SPINLOCK(kbd_event_lock); static DEFINE_SPINLOCK(led_lock); static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf' and friends */ static DECLARE_BITMAP(key_down, KEY_CNT); /* keyboard key bitmap */ static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ static bool dead_key_next; /* Handles a number being assembled on the number pad */ static bool npadch_active; static unsigned int npadch_value; static unsigned int diacr; static bool rep; /* flag telling character repeat */ static int shift_state = 0; static unsigned int ledstate = -1U; /* undefined */ static unsigned char ledioctl; static bool vt_switch; /* * Notifier list for console keyboard events */ static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list); int register_keyboard_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&keyboard_notifier_list, nb); } EXPORT_SYMBOL_GPL(register_keyboard_notifier); int unregister_keyboard_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb); } EXPORT_SYMBOL_GPL(unregister_keyboard_notifier); /* * Translation of scancodes to keycodes. We set them on only the first * keyboard in the list that accepts the scancode and keycode. * Explanation for not choosing the first attached keyboard anymore: * USB keyboards for example have two event devices: one for all "normal" * keys and one for extra function keys (like "volume up", "make coffee", * etc.). So this means that scancodes for the extra function keys won't * be valid for the first event device, but will be for the second. */ struct getset_keycode_data { struct input_keymap_entry ke; int error; }; static int getkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; d->error = input_get_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully get one */ } static int getkeycode(unsigned int scancode) { struct getset_keycode_data d = { .ke = { .flags = 0, .len = sizeof(scancode), .keycode = 0, }, .error = -ENODEV, }; memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper); return d.error ?: d.ke.keycode; } static int setkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; d->error = input_set_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully set one */ } static int setkeycode(unsigned int scancode, unsigned int keycode) { struct getset_keycode_data d = { .ke = { .flags = 0, .len = sizeof(scancode), .keycode = keycode, }, .error = -ENODEV, }; memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper); return d.error; } /* * Making beeps and bells. Note that we prefer beeps to bells, but when * shutting the sound off we do both. */ static int kd_sound_helper(struct input_handle *handle, void *data) { unsigned int *hz = data; struct input_dev *dev = handle->dev; if (test_bit(EV_SND, dev->evbit)) { if (test_bit(SND_TONE, dev->sndbit)) { input_inject_event(handle, EV_SND, SND_TONE, *hz); if (*hz) return 0; } if (test_bit(SND_BELL, dev->sndbit)) input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0); } return 0; } static void kd_nosound(struct timer_list *unused) { static unsigned int zero; input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper); } static DEFINE_TIMER(kd_mksound_timer, kd_nosound); void kd_mksound(unsigned int hz, unsigned int ticks) { del_timer_sync(&kd_mksound_timer); input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper); if (hz && ticks) mod_timer(&kd_mksound_timer, jiffies + ticks); } EXPORT_SYMBOL(kd_mksound); /* * Setting the keyboard rate. */ static int kbd_rate_helper(struct input_handle *handle, void *data) { struct input_dev *dev = handle->dev; struct kbd_repeat *rpt = data; if (test_bit(EV_REP, dev->evbit)) { if (rpt[0].delay > 0) input_inject_event(handle, EV_REP, REP_DELAY, rpt[0].delay); if (rpt[0].period > 0) input_inject_event(handle, EV_REP, REP_PERIOD, rpt[0].period); rpt[1].delay = dev->rep[REP_DELAY]; rpt[1].period = dev->rep[REP_PERIOD]; } return 0; } int kbd_rate(struct kbd_repeat *rpt) { struct kbd_repeat data[2] = { *rpt }; input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper); *rpt = data[1]; /* Copy currently used settings */ return 0; } /* * Helper Functions. */ static void put_queue(struct vc_data *vc, int ch) { tty_insert_flip_char(&vc->port, ch, 0); tty_flip_buffer_push(&vc->port); } static void puts_queue(struct vc_data *vc, const char *cp) { tty_insert_flip_string(&vc->port, cp, strlen(cp)); tty_flip_buffer_push(&vc->port); } static void applkey(struct vc_data *vc, int key, char mode) { static char buf[] = { 0x1b, 'O', 0x00, 0x00 }; buf[1] = (mode ? 'O' : '['); buf[2] = key; puts_queue(vc, buf); } /* * Many other routines do put_queue, but I think either * they produce ASCII, or they produce some user-assigned * string, and in both cases we might assume that it is * in utf-8 already. */ static void to_utf8(struct vc_data *vc, uint c) { if (c < 0x80) /* 0******* */ put_queue(vc, c); else if (c < 0x800) { /* 110***** 10****** */ put_queue(vc, 0xc0 | (c >> 6)); put_queue(vc, 0x80 | (c & 0x3f)); } else if (c < 0x10000) { if (c >= 0xD800 && c < 0xE000) return; if (c == 0xFFFF) return; /* 1110**** 10****** 10****** */ put_queue(vc, 0xe0 | (c >> 12)); put_queue(vc, 0x80 | ((c >> 6) & 0x3f)); put_queue(vc, 0x80 | (c & 0x3f)); } else if (c < 0x110000) { /* 11110*** 10****** 10****** 10****** */ put_queue(vc, 0xf0 | (c >> 18)); put_queue(vc, 0x80 | ((c >> 12) & 0x3f)); put_queue(vc, 0x80 | ((c >> 6) & 0x3f)); put_queue(vc, 0x80 | (c & 0x3f)); } } /* FIXME: review locking for vt.c callers */ static void set_leds(void) { tasklet_schedule(&keyboard_tasklet); } /* * Called after returning from RAW mode or when changing consoles - recompute * shift_down[] and shift_state from key_down[] maybe called when keymap is * undefined, so that shiftkey release is seen. The caller must hold the * kbd_event_lock. */ static void do_compute_shiftstate(void) { unsigned int k, sym, val; shift_state = 0; memset(shift_down, 0, sizeof(shift_down)); for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) { sym = U(key_maps[0][k]); if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK) continue; val = KVAL(sym); if (val == KVAL(K_CAPSSHIFT)) val = KVAL(K_SHIFT); shift_down[val]++; shift_state |= BIT(val); } } /* We still have to export this method to vt.c */ void vt_set_leds_compute_shiftstate(void) { unsigned long flags; /* * When VT is switched, the keyboard led needs to be set once. * Ensure that after the switch is completed, the state of the * keyboard LED is consistent with the state of the keyboard lock. */ vt_switch = true; set_leds(); spin_lock_irqsave(&kbd_event_lock, flags); do_compute_shiftstate(); spin_unlock_irqrestore(&kbd_event_lock, flags); } /* * We have a combining character DIACR here, followed by the character CH. * If the combination occurs in the table, return the corresponding value. * Otherwise, if CH is a space or equals DIACR, return DIACR. * Otherwise, conclude that DIACR was not combining after all, * queue it and return CH. */ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch) { unsigned int d = diacr; unsigned int i; diacr = 0; if ((d & ~0xff) == BRL_UC_ROW) { if ((ch & ~0xff) == BRL_UC_ROW) return d | ch; } else { for (i = 0; i < accent_table_size; i++) if (accent_table[i].diacr == d && accent_table[i].base == ch) return accent_table[i].result; } if (ch == ' ' || ch == (BRL_UC_ROW|0) || ch == d) return d; if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, d); else { int c = conv_uni_to_8bit(d); if (c != -1) put_queue(vc, c); } return ch; } /* * Special function handlers */ static void fn_enter(struct vc_data *vc) { if (diacr) { if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, diacr); else { int c = conv_uni_to_8bit(diacr); if (c != -1) put_queue(vc, c); } diacr = 0; } put_queue(vc, '\r'); if (vc_kbd_mode(kbd, VC_CRLF)) put_queue(vc, '\n'); } static void fn_caps_toggle(struct vc_data *vc) { if (rep) return; chg_vc_kbd_led(kbd, VC_CAPSLOCK); } static void fn_caps_on(struct vc_data *vc) { if (rep) return; set_vc_kbd_led(kbd, VC_CAPSLOCK); } static void fn_show_ptregs(struct vc_data *vc) { struct pt_regs *regs = get_irq_regs(); if (regs) show_regs(regs); } static void fn_hold(struct vc_data *vc) { struct tty_struct *tty = vc->port.tty; if (rep || !tty) return; /* * Note: SCROLLOCK will be set (cleared) by stop_tty (start_tty); * these routines are also activated by ^S/^Q. * (And SCROLLOCK can also be set by the ioctl KDSKBLED.) */ if (tty->flow.stopped) start_tty(tty); else stop_tty(tty); } static void fn_num(struct vc_data *vc) { if (vc_kbd_mode(kbd, VC_APPLIC)) applkey(vc, 'P', 1); else fn_bare_num(vc); } /* * Bind this to Shift-NumLock if you work in application keypad mode * but want to be able to change the NumLock flag. * Bind this to NumLock if you prefer that the NumLock key always * changes the NumLock flag. */ static void fn_bare_num(struct vc_data *vc) { if (!rep) chg_vc_kbd_led(kbd, VC_NUMLOCK); } static void fn_lastcons(struct vc_data *vc) { /* switch to the last used console, ChN */ set_console(last_console); } static void fn_dec_console(struct vc_data *vc) { int i, cur = fg_console; /* Currently switching? Queue this next switch relative to that. */ if (want_console != -1) cur = want_console; for (i = cur - 1; i != cur; i--) { if (i == -1) i = MAX_NR_CONSOLES - 1; if (vc_cons_allocated(i)) break; } set_console(i); } static void fn_inc_console(struct vc_data *vc) { int i, cur = fg_console; /* Currently switching? Queue this next switch relative to that. */ if (want_console != -1) cur = want_console; for (i = cur+1; i != cur; i++) { if (i == MAX_NR_CONSOLES) i = 0; if (vc_cons_allocated(i)) break; } set_console(i); } static void fn_send_intr(struct vc_data *vc) { tty_insert_flip_char(&vc->port, 0, TTY_BREAK); tty_flip_buffer_push(&vc->port); } static void fn_scroll_forw(struct vc_data *vc) { scrollfront(vc, 0); } static void fn_scroll_back(struct vc_data *vc) { scrollback(vc); } static void fn_show_mem(struct vc_data *vc) { show_mem(); } static void fn_show_state(struct vc_data *vc) { show_state(); } static void fn_boot_it(struct vc_data *vc) { ctrl_alt_del(); } static void fn_compose(struct vc_data *vc) { dead_key_next = true; } static void fn_spawn_con(struct vc_data *vc) { spin_lock(&vt_spawn_con.lock); if (vt_spawn_con.pid) if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) { put_pid(vt_spawn_con.pid); vt_spawn_con.pid = NULL; } spin_unlock(&vt_spawn_con.lock); } static void fn_SAK(struct vc_data *vc) { struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work; schedule_work(SAK_work); } static void fn_null(struct vc_data *vc) { do_compute_shiftstate(); } /* * Special key handlers */ static void k_ignore(struct vc_data *vc, unsigned char value, char up_flag) { } static void k_spec(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; if (value >= ARRAY_SIZE(fn_handler)) return; if ((kbd->kbdmode == VC_RAW || kbd->kbdmode == VC_MEDIUMRAW || kbd->kbdmode == VC_OFF) && value != KVAL(K_SAK)) return; /* SAK is allowed even in raw mode */ fn_handler[value](vc); } static void k_lowercase(struct vc_data *vc, unsigned char value, char up_flag) { pr_err("k_lowercase was called - impossible\n"); } static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag) { if (up_flag) return; /* no action, if this is a key release */ if (diacr) value = handle_diacr(vc, value); if (dead_key_next) { dead_key_next = false; diacr = value; return; } if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, value); else { int c = conv_uni_to_8bit(value); if (c != -1) put_queue(vc, c); } } /* * Handle dead key. Note that we now may have several * dead keys modifying the same character. Very useful * for Vietnamese. */ static void k_deadunicode(struct vc_data *vc, unsigned int value, char up_flag) { if (up_flag) return; diacr = (diacr ? handle_diacr(vc, value) : value); } static void k_self(struct vc_data *vc, unsigned char value, char up_flag) { k_unicode(vc, conv_8bit_to_uni(value), up_flag); } static void k_dead2(struct vc_data *vc, unsigned char value, char up_flag) { k_deadunicode(vc, value, up_flag); } /* * Obsolete - for backwards compatibility only */ static void k_dead(struct vc_data *vc, unsigned char value, char up_flag) { static const unsigned char ret_diacr[NR_DEAD] = { '`', /* dead_grave */ '\'', /* dead_acute */ '^', /* dead_circumflex */ '~', /* dead_tilda */ '"', /* dead_diaeresis */ ',', /* dead_cedilla */ '_', /* dead_macron */ 'U', /* dead_breve */ '.', /* dead_abovedot */ '*', /* dead_abovering */ '=', /* dead_doubleacute */ 'c', /* dead_caron */ 'k', /* dead_ogonek */ 'i', /* dead_iota */ '#', /* dead_voiced_sound */ 'o', /* dead_semivoiced_sound */ '!', /* dead_belowdot */ '?', /* dead_hook */ '+', /* dead_horn */ '-', /* dead_stroke */ ')', /* dead_abovecomma */ '(', /* dead_abovereversedcomma */ ':', /* dead_doublegrave */ 'n', /* dead_invertedbreve */ ';', /* dead_belowcomma */ '$', /* dead_currency */ '@', /* dead_greek */ }; k_deadunicode(vc, ret_diacr[value], up_flag); } static void k_cons(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; set_console(value); } static void k_fn(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; if ((unsigned)value < ARRAY_SIZE(func_table)) { unsigned long flags; spin_lock_irqsave(&func_buf_lock, flags); if (func_table[value]) puts_queue(vc, func_table[value]); spin_unlock_irqrestore(&func_buf_lock, flags); } else pr_err("k_fn called with value=%d\n", value); } static void k_cur(struct vc_data *vc, unsigned char value, char up_flag) { static const char cur_chars[] = "BDCA"; if (up_flag) return; applkey(vc, cur_chars[value], vc_kbd_mode(kbd, VC_CKMODE)); } static void k_pad(struct vc_data *vc, unsigned char value, char up_flag) { static const char pad_chars[] = "0123456789+-*/\015,.?()#"; static const char app_map[] = "pqrstuvwxylSRQMnnmPQS"; if (up_flag) return; /* no action, if this is a key release */ /* kludge... shift forces cursor/number keys */ if (vc_kbd_mode(kbd, VC_APPLIC) && !shift_down[KG_SHIFT]) { applkey(vc, app_map[value], 1); return; } if (!vc_kbd_led(kbd, VC_NUMLOCK)) { switch (value) { case KVAL(K_PCOMMA): case KVAL(K_PDOT): k_fn(vc, KVAL(K_REMOVE), 0); return; case KVAL(K_P0): k_fn(vc, KVAL(K_INSERT), 0); return; case KVAL(K_P1): k_fn(vc, KVAL(K_SELECT), 0); return; case KVAL(K_P2): k_cur(vc, KVAL(K_DOWN), 0); return; case KVAL(K_P3): k_fn(vc, KVAL(K_PGDN), 0); return; case KVAL(K_P4): k_cur(vc, KVAL(K_LEFT), 0); return; case KVAL(K_P6): k_cur(vc, KVAL(K_RIGHT), 0); return; case KVAL(K_P7): k_fn(vc, KVAL(K_FIND), 0); return; case KVAL(K_P8): k_cur(vc, KVAL(K_UP), 0); return; case KVAL(K_P9): k_fn(vc, KVAL(K_PGUP), 0); return; case KVAL(K_P5): applkey(vc, 'G', vc_kbd_mode(kbd, VC_APPLIC)); return; } } put_queue(vc, pad_chars[value]); if (value == KVAL(K_PENTER) && vc_kbd_mode(kbd, VC_CRLF)) put_queue(vc, '\n'); } static void k_shift(struct vc_data *vc, unsigned char value, char up_flag) { int old_state = shift_state; if (rep) return; /* * Mimic typewriter: * a CapsShift key acts like Shift but undoes CapsLock */ if (value == KVAL(K_CAPSSHIFT)) { value = KVAL(K_SHIFT); if (!up_flag) clr_vc_kbd_led(kbd, VC_CAPSLOCK); } if (up_flag) { /* * handle the case that two shift or control * keys are depressed simultaneously */ if (shift_down[value]) shift_down[value]--; } else shift_down[value]++; if (shift_down[value]) shift_state |= BIT(value); else shift_state &= ~BIT(value); /* kludge */ if (up_flag && shift_state != old_state && npadch_active) { if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, npadch_value); else put_queue(vc, npadch_value & 0xff); npadch_active = false; } } static void k_meta(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; if (vc_kbd_mode(kbd, VC_META)) { put_queue(vc, '\033'); put_queue(vc, value); } else put_queue(vc, value | BIT(7)); } static void k_ascii(struct vc_data *vc, unsigned char value, char up_flag) { unsigned int base; if (up_flag) return; if (value < 10) { /* decimal input of code, while Alt depressed */ base = 10; } else { /* hexadecimal input of code, while AltGr depressed */ value -= 10; base = 16; } if (!npadch_active) { npadch_value = 0; npadch_active = true; } npadch_value = npadch_value * base + value; } static void k_lock(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag || rep) return; chg_vc_kbd_lock(kbd, value); } static void k_slock(struct vc_data *vc, unsigned char value, char up_flag) { k_shift(vc, value, up_flag); if (up_flag || rep) return; chg_vc_kbd_slock(kbd, value); /* try to make Alt, oops, AltGr and such work */ if (!key_maps[kbd->lockstate ^ kbd->slockstate]) { kbd->slockstate = 0; chg_vc_kbd_slock(kbd, value); } } /* by default, 300ms interval for combination release */ static unsigned brl_timeout = 300; MODULE_PARM_DESC(brl_timeout, "Braille keys release delay in ms (0 for commit on first key release)"); module_param(brl_timeout, uint, 0644); static unsigned brl_nbchords = 1; MODULE_PARM_DESC(brl_nbchords, "Number of chords that produce a braille pattern (0 for dead chords)"); module_param(brl_nbchords, uint, 0644); static void k_brlcommit(struct vc_data *vc, unsigned int pattern, char up_flag) { static unsigned long chords; static unsigned committed; if (!brl_nbchords) k_deadunicode(vc, BRL_UC_ROW | pattern, up_flag); else { committed |= pattern; chords++; if (chords == brl_nbchords) { k_unicode(vc, BRL_UC_ROW | committed, up_flag); chords = 0; committed = 0; } } } static void k_brl(struct vc_data *vc, unsigned char value, char up_flag) { static unsigned pressed, committing; static unsigned long releasestart; if (kbd->kbdmode != VC_UNICODE) { if (!up_flag) pr_warn("keyboard mode must be unicode for braille patterns\n"); return; } if (!value) { k_unicode(vc, BRL_UC_ROW, up_flag); return; } if (value > 8) return; if (!up_flag) { pressed |= BIT(value - 1); if (!brl_timeout) committing = pressed; } else if (brl_timeout) { if (!committing || time_after(jiffies, releasestart + msecs_to_jiffies(brl_timeout))) { committing = pressed; releasestart = jiffies; } pressed &= ~BIT(value - 1); if (!pressed && committing) { k_brlcommit(vc, committing, 0); committing = 0; } } else { if (committing) { k_brlcommit(vc, committing, 0); committing = 0; } pressed &= ~BIT(value - 1); } } #if IS_ENABLED(CONFIG_INPUT_LEDS) && IS_ENABLED(CONFIG_LEDS_TRIGGERS) struct kbd_led_trigger { struct led_trigger trigger; unsigned int mask; }; static int kbd_led_trigger_activate(struct led_classdev *cdev) { struct kbd_led_trigger *trigger = container_of(cdev->trigger, struct kbd_led_trigger, trigger); tasklet_disable(&keyboard_tasklet); if (ledstate != -1U) led_set_brightness(cdev, ledstate & trigger->mask ? LED_FULL : LED_OFF); tasklet_enable(&keyboard_tasklet); return 0; } #define KBD_LED_TRIGGER(_led_bit, _name) { \ .trigger = { \ .name = _name, \ .activate = kbd_led_trigger_activate, \ }, \ .mask = BIT(_led_bit), \ } #define KBD_LOCKSTATE_TRIGGER(_led_bit, _name) \ KBD_LED_TRIGGER((_led_bit) + 8, _name) static struct kbd_led_trigger kbd_led_triggers[] = { KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrolllock"), KBD_LED_TRIGGER(VC_NUMLOCK, "kbd-numlock"), KBD_LED_TRIGGER(VC_CAPSLOCK, "kbd-capslock"), KBD_LED_TRIGGER(VC_KANALOCK, "kbd-kanalock"), KBD_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"), KBD_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"), KBD_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"), KBD_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"), KBD_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"), KBD_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"), KBD_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"), KBD_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"), }; static void kbd_propagate_led_state(unsigned int old_state, unsigned int new_state) { struct kbd_led_trigger *trigger; unsigned int changed = old_state ^ new_state; int i; for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); i++) { trigger = &kbd_led_triggers[i]; if (changed & trigger->mask) led_trigger_event(&trigger->trigger, new_state & trigger->mask ? LED_FULL : LED_OFF); } } static int kbd_update_leds_helper(struct input_handle *handle, void *data) { unsigned int led_state = *(unsigned int *)data; if (test_bit(EV_LED, handle->dev->evbit)) kbd_propagate_led_state(~led_state, led_state); return 0; } static void kbd_init_leds(void) { int error; int i; for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); i++) { error = led_trigger_register(&kbd_led_triggers[i].trigger); if (error) pr_err("error %d while registering trigger %s\n", error, kbd_led_triggers[i].trigger.name); } } #else static int kbd_update_leds_helper(struct input_handle *handle, void *data) { unsigned int leds = *(unsigned int *)data; if (test_bit(EV_LED, handle->dev->evbit)) { input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & BIT(0))); input_inject_event(handle, EV_LED, LED_NUML, !!(leds & BIT(1))); input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & BIT(2))); input_inject_event(handle, EV_SYN, SYN_REPORT, 0); } return 0; } static void kbd_propagate_led_state(unsigned int old_state, unsigned int new_state) { input_handler_for_each_handle(&kbd_handler, &new_state, kbd_update_leds_helper); } static void kbd_init_leds(void) { } #endif /* * The leds display either (i) the status of NumLock, CapsLock, ScrollLock, * or (ii) whatever pattern of lights people want to show using KDSETLED, * or (iii) specified bits of specified words in kernel memory. */ static unsigned char getledstate(void) { return ledstate & 0xff; } void setledstate(struct kbd_struct *kb, unsigned int led) { unsigned long flags; spin_lock_irqsave(&led_lock, flags); if (!(led & ~7)) { ledioctl = led; kb->ledmode = LED_SHOW_IOCTL; } else kb->ledmode = LED_SHOW_FLAGS; set_leds(); spin_unlock_irqrestore(&led_lock, flags); } static inline unsigned char getleds(void) { struct kbd_struct *kb = kbd_table + fg_console; if (kb->ledmode == LED_SHOW_IOCTL) return ledioctl; return kb->ledflagstate; } /** * vt_get_leds - helper for braille console * @console: console to read * @flag: flag we want to check * * Check the status of a keyboard led flag and report it back */ int vt_get_leds(unsigned int console, int flag) { struct kbd_struct *kb = &kbd_table[console]; int ret; unsigned long flags; spin_lock_irqsave(&led_lock, flags); ret = vc_kbd_led(kb, flag); spin_unlock_irqrestore(&led_lock, flags); return ret; } EXPORT_SYMBOL_GPL(vt_get_leds); /** * vt_set_led_state - set LED state of a console * @console: console to set * @leds: LED bits * * Set the LEDs on a console. This is a wrapper for the VT layer * so that we can keep kbd knowledge internal */ void vt_set_led_state(unsigned int console, int leds) { struct kbd_struct *kb = &kbd_table[console]; setledstate(kb, leds); } /** * vt_kbd_con_start - Keyboard side of console start * @console: console * * Handle console start. This is a wrapper for the VT layer * so that we can keep kbd knowledge internal * * FIXME: We eventually need to hold the kbd lock here to protect * the LED updating. We can't do it yet because fn_hold calls stop_tty * and start_tty under the kbd_event_lock, while normal tty paths * don't hold the lock. We probably need to split out an LED lock * but not during an -rc release! */ void vt_kbd_con_start(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&led_lock, flags); clr_vc_kbd_led(kb, VC_SCROLLOCK); set_leds(); spin_unlock_irqrestore(&led_lock, flags); } /** * vt_kbd_con_stop - Keyboard side of console stop * @console: console * * Handle console stop. This is a wrapper for the VT layer * so that we can keep kbd knowledge internal */ void vt_kbd_con_stop(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&led_lock, flags); set_vc_kbd_led(kb, VC_SCROLLOCK); set_leds(); spin_unlock_irqrestore(&led_lock, flags); } /* * This is the tasklet that updates LED state of LEDs using standard * keyboard triggers. The reason we use tasklet is that we need to * handle the scenario when keyboard handler is not registered yet * but we already getting updates from the VT to update led state. */ static void kbd_bh(struct tasklet_struct *unused) { unsigned int leds; unsigned long flags; spin_lock_irqsave(&led_lock, flags); leds = getleds(); leds |= (unsigned int)kbd->lockstate << 8; spin_unlock_irqrestore(&led_lock, flags); if (vt_switch) { ledstate = ~leds; vt_switch = false; } if (leds != ledstate) { kbd_propagate_led_state(ledstate, leds); ledstate = leds; } } #if defined(CONFIG_X86) || defined(CONFIG_ALPHA) ||\ defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\ defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\ (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC)) static inline bool kbd_is_hw_raw(const struct input_dev *dev) { if (!test_bit(EV_MSC, dev->evbit) || !test_bit(MSC_RAW, dev->mscbit)) return false; return dev->id.bustype == BUS_I8042 && dev->id.vendor == 0x0001 && dev->id.product == 0x0001; } static const unsigned short x86_keycodes[256] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,118, 86, 87, 88,115,120,119,121,112,123, 92, 284,285,309, 0,312, 91,327,328,329,331,333,335,336,337,338,339, 367,288,302,304,350, 89,334,326,267,126,268,269,125,347,348,349, 360,261,262,263,268,376,100,101,321,316,373,286,289,102,351,355, 103,104,105,275,287,279,258,106,274,107,294,364,358,363,362,361, 291,108,381,281,290,272,292,305,280, 99,112,257,306,359,113,114, 264,117,271,374,379,265,266, 93, 94, 95, 85,259,375,260, 90,116, 377,109,111,277,278,282,283,295,296,297,299,300,301,293,303,307, 308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330, 332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 }; #ifdef CONFIG_SPARC static int sparc_l1_a_state; extern void sun_do_break(void); #endif static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag) { int code; switch (keycode) { case KEY_PAUSE: put_queue(vc, 0xe1); put_queue(vc, 0x1d | up_flag); put_queue(vc, 0x45 | up_flag); break; case KEY_HANGEUL: if (!up_flag) put_queue(vc, 0xf2); break; case KEY_HANJA: if (!up_flag) put_queue(vc, 0xf1); break; case KEY_SYSRQ: /* * Real AT keyboards (that's what we're trying * to emulate here) emit 0xe0 0x2a 0xe0 0x37 when * pressing PrtSc/SysRq alone, but simply 0x54 * when pressing Alt+PrtSc/SysRq. */ if (test_bit(KEY_LEFTALT, key_down) || test_bit(KEY_RIGHTALT, key_down)) { put_queue(vc, 0x54 | up_flag); } else { put_queue(vc, 0xe0); put_queue(vc, 0x2a | up_flag); put_queue(vc, 0xe0); put_queue(vc, 0x37 | up_flag); } break; default: if (keycode > 255) return -1; code = x86_keycodes[keycode]; if (!code) return -1; if (code & 0x100) put_queue(vc, 0xe0); put_queue(vc, (code & 0x7f) | up_flag); break; } return 0; } #else static inline bool kbd_is_hw_raw(const struct input_dev *dev) { return false; } static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag) { if (keycode > 127) return -1; put_queue(vc, keycode | up_flag); return 0; } #endif static void kbd_rawcode(unsigned char data) { struct vc_data *vc = vc_cons[fg_console].d; kbd = &kbd_table[vc->vc_num]; if (kbd->kbdmode == VC_RAW) put_queue(vc, data); } static void kbd_keycode(unsigned int keycode, int down, bool hw_raw) { struct vc_data *vc = vc_cons[fg_console].d; unsigned short keysym, *key_map; unsigned char type; bool raw_mode; struct tty_struct *tty; int shift_final; struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down }; int rc; tty = vc->port.tty; if (tty && (!tty->driver_data)) { /* No driver data? Strange. Okay we fix it then. */ tty->driver_data = vc; } kbd = &kbd_table[vc->vc_num]; #ifdef CONFIG_SPARC if (keycode == KEY_STOP) sparc_l1_a_state = down; #endif rep = (down == 2); raw_mode = (kbd->kbdmode == VC_RAW); if (raw_mode && !hw_raw) if (emulate_raw(vc, keycode, !down << 7)) if (keycode < BTN_MISC && printk_ratelimit()) pr_warn("can't emulate rawmode for keycode %d\n", keycode); #ifdef CONFIG_SPARC if (keycode == KEY_A && sparc_l1_a_state) { sparc_l1_a_state = false; sun_do_break(); } #endif if (kbd->kbdmode == VC_MEDIUMRAW) { /* * This is extended medium raw mode, with keys above 127 * encoded as 0, high 7 bits, low 7 bits, with the 0 bearing * the 'up' flag if needed. 0 is reserved, so this shouldn't * interfere with anything else. The two bytes after 0 will * always have the up flag set not to interfere with older * applications. This allows for 16384 different keycodes, * which should be enough. */ if (keycode < 128) { put_queue(vc, keycode | (!down << 7)); } else { put_queue(vc, !down << 7); put_queue(vc, (keycode >> 7) | BIT(7)); put_queue(vc, keycode | BIT(7)); } raw_mode = true; } assign_bit(keycode, key_down, down); if (rep && (!vc_kbd_mode(kbd, VC_REPEAT) || (tty && !L_ECHO(tty) && tty_chars_in_buffer(tty)))) { /* * Don't repeat a key if the input buffers are not empty and the * characters get aren't echoed locally. This makes key repeat * usable with slow applications and under heavy loads. */ return; } param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate; param.ledstate = kbd->ledflagstate; key_map = key_maps[shift_final]; rc = atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYCODE, ¶m); if (rc == NOTIFY_STOP || !key_map) { atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNBOUND_KEYCODE, ¶m); do_compute_shiftstate(); kbd->slockstate = 0; return; } if (keycode < NR_KEYS) keysym = key_map[keycode]; else if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8) keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1)); else return; type = KTYP(keysym); if (type < 0xf0) { param.value = keysym; rc = atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNICODE, ¶m); if (rc != NOTIFY_STOP) if (down && !raw_mode) k_unicode(vc, keysym, !down); return; } type -= 0xf0; if (type == KT_LETTER) { type = KT_LATIN; if (vc_kbd_led(kbd, VC_CAPSLOCK)) { key_map = key_maps[shift_final ^ BIT(KG_SHIFT)]; if (key_map) keysym = key_map[keycode]; } } param.value = keysym; rc = atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYSYM, ¶m); if (rc == NOTIFY_STOP) return; if ((raw_mode || kbd->kbdmode == VC_OFF) && type != KT_SPEC && type != KT_SHIFT) return; (*k_handler[type])(vc, keysym & 0xff, !down); param.ledstate = kbd->ledflagstate; atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, ¶m); if (type != KT_SLOCK) kbd->slockstate = 0; } static void kbd_event(struct input_handle *handle, unsigned int event_type, unsigned int event_code, int value) { /* We are called with interrupts disabled, just take the lock */ spin_lock(&kbd_event_lock); if (event_type == EV_MSC && event_code == MSC_RAW && kbd_is_hw_raw(handle->dev)) kbd_rawcode(value); if (event_type == EV_KEY && event_code <= KEY_MAX) kbd_keycode(event_code, value, kbd_is_hw_raw(handle->dev)); spin_unlock(&kbd_event_lock); tasklet_schedule(&keyboard_tasklet); do_poke_blanked_console = 1; schedule_console_callback(); } static bool kbd_match(struct input_handler *handler, struct input_dev *dev) { if (test_bit(EV_SND, dev->evbit)) return true; if (test_bit(EV_KEY, dev->evbit)) { if (find_next_bit(dev->keybit, BTN_MISC, KEY_RESERVED) < BTN_MISC) return true; if (find_next_bit(dev->keybit, KEY_BRL_DOT10 + 1, KEY_BRL_DOT1) <= KEY_BRL_DOT10) return true; } return false; } /* * When a keyboard (or other input device) is found, the kbd_connect * function is called. The function then looks at the device, and if it * likes it, it can open it and get events from it. In this (kbd_connect) * function, we should decide which VT to bind that keyboard to initially. */ static int kbd_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_handle *handle; int error; handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); if (!handle) return -ENOMEM; handle->dev = dev; handle->handler = handler; handle->name = "kbd"; error = input_register_handle(handle); if (error) goto err_free_handle; error = input_open_device(handle); if (error) goto err_unregister_handle; return 0; err_unregister_handle: input_unregister_handle(handle); err_free_handle: kfree(handle); return error; } static void kbd_disconnect(struct input_handle *handle) { input_close_device(handle); input_unregister_handle(handle); kfree(handle); } /* * Start keyboard handler on the new keyboard by refreshing LED state to * match the rest of the system. */ static void kbd_start(struct input_handle *handle) { tasklet_disable(&keyboard_tasklet); if (ledstate != -1U) kbd_update_leds_helper(handle, &ledstate); tasklet_enable(&keyboard_tasklet); } static const struct input_device_id kbd_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_KEY) }, }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_SND) }, }, { }, /* Terminating entry */ }; MODULE_DEVICE_TABLE(input, kbd_ids); static struct input_handler kbd_handler = { .event = kbd_event, .match = kbd_match, .connect = kbd_connect, .disconnect = kbd_disconnect, .start = kbd_start, .name = "kbd", .id_table = kbd_ids, }; int __init kbd_init(void) { int i; int error; for (i = 0; i < MAX_NR_CONSOLES; i++) { kbd_table[i].ledflagstate = kbd_defleds(); kbd_table[i].default_ledflagstate = kbd_defleds(); kbd_table[i].ledmode = LED_SHOW_FLAGS; kbd_table[i].lockstate = KBD_DEFLOCK; kbd_table[i].slockstate = 0; kbd_table[i].modeflags = KBD_DEFMODE; kbd_table[i].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE; } kbd_init_leds(); error = input_register_handler(&kbd_handler); if (error) return error; tasklet_enable(&keyboard_tasklet); tasklet_schedule(&keyboard_tasklet); return 0; } /* Ioctl support code */ /** * vt_do_diacrit - diacritical table updates * @cmd: ioctl request * @udp: pointer to user data for ioctl * @perm: permissions check computed by caller * * Update the diacritical tables atomically and safely. Lock them * against simultaneous keypresses */ int vt_do_diacrit(unsigned int cmd, void __user *udp, int perm) { unsigned long flags; int asize; int ret = 0; switch (cmd) { case KDGKBDIACR: { struct kbdiacrs __user *a = udp; struct kbdiacr *dia; int i; dia = kmalloc_array(MAX_DIACR, sizeof(struct kbdiacr), GFP_KERNEL); if (!dia) return -ENOMEM; /* Lock the diacriticals table, make a copy and then copy it after we unlock */ spin_lock_irqsave(&kbd_event_lock, flags); asize = accent_table_size; for (i = 0; i < asize; i++) { dia[i].diacr = conv_uni_to_8bit( accent_table[i].diacr); dia[i].base = conv_uni_to_8bit( accent_table[i].base); dia[i].result = conv_uni_to_8bit( accent_table[i].result); } spin_unlock_irqrestore(&kbd_event_lock, flags); if (put_user(asize, &a->kb_cnt)) ret = -EFAULT; else if (copy_to_user(a->kbdiacr, dia, asize * sizeof(struct kbdiacr))) ret = -EFAULT; kfree(dia); return ret; } case KDGKBDIACRUC: { struct kbdiacrsuc __user *a = udp; void *buf; buf = kmalloc_array(MAX_DIACR, sizeof(struct kbdiacruc), GFP_KERNEL); if (buf == NULL) return -ENOMEM; /* Lock the diacriticals table, make a copy and then copy it after we unlock */ spin_lock_irqsave(&kbd_event_lock, flags); asize = accent_table_size; memcpy(buf, accent_table, asize * sizeof(struct kbdiacruc)); spin_unlock_irqrestore(&kbd_event_lock, flags); if (put_user(asize, &a->kb_cnt)) ret = -EFAULT; else if (copy_to_user(a->kbdiacruc, buf, asize*sizeof(struct kbdiacruc))) ret = -EFAULT; kfree(buf); return ret; } case KDSKBDIACR: { struct kbdiacrs __user *a = udp; struct kbdiacr *dia = NULL; unsigned int ct; int i; if (!perm) return -EPERM; if (get_user(ct, &a->kb_cnt)) return -EFAULT; if (ct >= MAX_DIACR) return -EINVAL; if (ct) { dia = memdup_array_user(a->kbdiacr, ct, sizeof(struct kbdiacr)); if (IS_ERR(dia)) return PTR_ERR(dia); } spin_lock_irqsave(&kbd_event_lock, flags); accent_table_size = ct; for (i = 0; i < ct; i++) { accent_table[i].diacr = conv_8bit_to_uni(dia[i].diacr); accent_table[i].base = conv_8bit_to_uni(dia[i].base); accent_table[i].result = conv_8bit_to_uni(dia[i].result); } spin_unlock_irqrestore(&kbd_event_lock, flags); kfree(dia); return 0; } case KDSKBDIACRUC: { struct kbdiacrsuc __user *a = udp; unsigned int ct; void *buf = NULL; if (!perm) return -EPERM; if (get_user(ct, &a->kb_cnt)) return -EFAULT; if (ct >= MAX_DIACR) return -EINVAL; if (ct) { buf = memdup_array_user(a->kbdiacruc, ct, sizeof(struct kbdiacruc)); if (IS_ERR(buf)) return PTR_ERR(buf); } spin_lock_irqsave(&kbd_event_lock, flags); if (ct) memcpy(accent_table, buf, ct * sizeof(struct kbdiacruc)); accent_table_size = ct; spin_unlock_irqrestore(&kbd_event_lock, flags); kfree(buf); return 0; } } return ret; } /** * vt_do_kdskbmode - set keyboard mode ioctl * @console: the console to use * @arg: the requested mode * * Update the keyboard mode bits while holding the correct locks. * Return 0 for success or an error code. */ int vt_do_kdskbmode(unsigned int console, unsigned int arg) { struct kbd_struct *kb = &kbd_table[console]; int ret = 0; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); switch(arg) { case K_RAW: kb->kbdmode = VC_RAW; break; case K_MEDIUMRAW: kb->kbdmode = VC_MEDIUMRAW; break; case K_XLATE: kb->kbdmode = VC_XLATE; do_compute_shiftstate(); break; case K_UNICODE: kb->kbdmode = VC_UNICODE; do_compute_shiftstate(); break; case K_OFF: kb->kbdmode = VC_OFF; break; default: ret = -EINVAL; } spin_unlock_irqrestore(&kbd_event_lock, flags); return ret; } /** * vt_do_kdskbmeta - set keyboard meta state * @console: the console to use * @arg: the requested meta state * * Update the keyboard meta bits while holding the correct locks. * Return 0 for success or an error code. */ int vt_do_kdskbmeta(unsigned int console, unsigned int arg) { struct kbd_struct *kb = &kbd_table[console]; int ret = 0; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); switch(arg) { case K_METABIT: clr_vc_kbd_mode(kb, VC_META); break; case K_ESCPREFIX: set_vc_kbd_mode(kb, VC_META); break; default: ret = -EINVAL; } spin_unlock_irqrestore(&kbd_event_lock, flags); return ret; } int vt_do_kbkeycode_ioctl(int cmd, struct kbkeycode __user *user_kbkc, int perm) { struct kbkeycode tmp; int kc = 0; if (copy_from_user(&tmp, user_kbkc, sizeof(struct kbkeycode))) return -EFAULT; switch (cmd) { case KDGETKEYCODE: kc = getkeycode(tmp.scancode); if (kc >= 0) kc = put_user(kc, &user_kbkc->keycode); break; case KDSETKEYCODE: if (!perm) return -EPERM; kc = setkeycode(tmp.scancode, tmp.keycode); break; } return kc; } static unsigned short vt_kdgkbent(unsigned char kbdmode, unsigned char idx, unsigned char map) { unsigned short *key_map, val; unsigned long flags; /* Ensure another thread doesn't free it under us */ spin_lock_irqsave(&kbd_event_lock, flags); key_map = key_maps[map]; if (key_map) { val = U(key_map[idx]); if (kbdmode != VC_UNICODE && KTYP(val) >= NR_TYPES) val = K_HOLE; } else val = idx ? K_HOLE : K_NOSUCHMAP; spin_unlock_irqrestore(&kbd_event_lock, flags); return val; } static int vt_kdskbent(unsigned char kbdmode, unsigned char idx, unsigned char map, unsigned short val) { unsigned long flags; unsigned short *key_map, *new_map, oldval; if (!idx && val == K_NOSUCHMAP) { spin_lock_irqsave(&kbd_event_lock, flags); /* deallocate map */ key_map = key_maps[map]; if (map && key_map) { key_maps[map] = NULL; if (key_map[0] == U(K_ALLOCATED)) { kfree(key_map); keymap_count--; } } spin_unlock_irqrestore(&kbd_event_lock, flags); return 0; } if (KTYP(val) < NR_TYPES) { if (KVAL(val) > max_vals[KTYP(val)]) return -EINVAL; } else if (kbdmode != VC_UNICODE) return -EINVAL; /* ++Geert: non-PC keyboards may generate keycode zero */ #if !defined(__mc68000__) && !defined(__powerpc__) /* assignment to entry 0 only tests validity of args */ if (!idx) return 0; #endif new_map = kmalloc(sizeof(plain_map), GFP_KERNEL); if (!new_map) return -ENOMEM; spin_lock_irqsave(&kbd_event_lock, flags); key_map = key_maps[map]; if (key_map == NULL) { int j; if (keymap_count >= MAX_NR_OF_USER_KEYMAPS && !capable(CAP_SYS_RESOURCE)) { spin_unlock_irqrestore(&kbd_event_lock, flags); kfree(new_map); return -EPERM; } key_maps[map] = new_map; key_map = new_map; key_map[0] = U(K_ALLOCATED); for (j = 1; j < NR_KEYS; j++) key_map[j] = U(K_HOLE); keymap_count++; } else kfree(new_map); oldval = U(key_map[idx]); if (val == oldval) goto out; /* Attention Key */ if ((oldval == K_SAK || val == K_SAK) && !capable(CAP_SYS_ADMIN)) { spin_unlock_irqrestore(&kbd_event_lock, flags); return -EPERM; } key_map[idx] = U(val); if (!map && (KTYP(oldval) == KT_SHIFT || KTYP(val) == KT_SHIFT)) do_compute_shiftstate(); out: spin_unlock_irqrestore(&kbd_event_lock, flags); return 0; } int vt_do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; struct kbentry kbe; if (copy_from_user(&kbe, user_kbe, sizeof(struct kbentry))) return -EFAULT; switch (cmd) { case KDGKBENT: return put_user(vt_kdgkbent(kb->kbdmode, kbe.kb_index, kbe.kb_table), &user_kbe->kb_value); case KDSKBENT: if (!perm || !capable(CAP_SYS_TTY_CONFIG)) return -EPERM; return vt_kdskbent(kb->kbdmode, kbe.kb_index, kbe.kb_table, kbe.kb_value); } return 0; } static char *vt_kdskbsent(char *kbs, unsigned char cur) { static DECLARE_BITMAP(is_kmalloc, MAX_NR_FUNC); char *cur_f = func_table[cur]; if (cur_f && strlen(cur_f) >= strlen(kbs)) { strcpy(cur_f, kbs); return kbs; } func_table[cur] = kbs; return __test_and_set_bit(cur, is_kmalloc) ? cur_f : NULL; } int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { unsigned char kb_func; unsigned long flags; char *kbs; int ret; if (get_user(kb_func, &user_kdgkb->kb_func)) return -EFAULT; kb_func = array_index_nospec(kb_func, MAX_NR_FUNC); switch (cmd) { case KDGKBSENT: { /* size should have been a struct member */ ssize_t len = sizeof(user_kdgkb->kb_string); kbs = kmalloc(len, GFP_KERNEL); if (!kbs) return -ENOMEM; spin_lock_irqsave(&func_buf_lock, flags); len = strscpy(kbs, func_table[kb_func] ? : "", len); spin_unlock_irqrestore(&func_buf_lock, flags); if (len < 0) { ret = -ENOSPC; break; } ret = copy_to_user(user_kdgkb->kb_string, kbs, len + 1) ? -EFAULT : 0; break; } case KDSKBSENT: if (!perm || !capable(CAP_SYS_TTY_CONFIG)) return -EPERM; kbs = strndup_user(user_kdgkb->kb_string, sizeof(user_kdgkb->kb_string)); if (IS_ERR(kbs)) return PTR_ERR(kbs); spin_lock_irqsave(&func_buf_lock, flags); kbs = vt_kdskbsent(kbs, kb_func); spin_unlock_irqrestore(&func_buf_lock, flags); ret = 0; break; } kfree(kbs); return ret; } int vt_do_kdskled(unsigned int console, int cmd, unsigned long arg, int perm) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; unsigned char ucval; switch(cmd) { /* the ioctls below read/set the flags usually shown in the leds */ /* don't use them - they will go away without warning */ case KDGKBLED: spin_lock_irqsave(&kbd_event_lock, flags); ucval = kb->ledflagstate | (kb->default_ledflagstate << 4); spin_unlock_irqrestore(&kbd_event_lock, flags); return put_user(ucval, (char __user *)arg); case KDSKBLED: if (!perm) return -EPERM; if (arg & ~0x77) return -EINVAL; spin_lock_irqsave(&led_lock, flags); kb->ledflagstate = (arg & 7); kb->default_ledflagstate = ((arg >> 4) & 7); set_leds(); spin_unlock_irqrestore(&led_lock, flags); return 0; /* the ioctls below only set the lights, not the functions */ /* for those, see KDGKBLED and KDSKBLED above */ case KDGETLED: ucval = getledstate(); return put_user(ucval, (char __user *)arg); case KDSETLED: if (!perm) return -EPERM; setledstate(kb, arg); return 0; } return -ENOIOCTLCMD; } int vt_do_kdgkbmode(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; /* This is a spot read so needs no locking */ switch (kb->kbdmode) { case VC_RAW: return K_RAW; case VC_MEDIUMRAW: return K_MEDIUMRAW; case VC_UNICODE: return K_UNICODE; case VC_OFF: return K_OFF; default: return K_XLATE; } } /** * vt_do_kdgkbmeta - report meta status * @console: console to report * * Report the meta flag status of this console */ int vt_do_kdgkbmeta(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; /* Again a spot read so no locking */ return vc_kbd_mode(kb, VC_META) ? K_ESCPREFIX : K_METABIT; } /** * vt_reset_unicode - reset the unicode status * @console: console being reset * * Restore the unicode console state to its default */ void vt_reset_unicode(unsigned int console) { unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); kbd_table[console].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE; spin_unlock_irqrestore(&kbd_event_lock, flags); } /** * vt_get_shift_state - shift bit state * * Report the shift bits from the keyboard state. We have to export * this to support some oddities in the vt layer. */ int vt_get_shift_state(void) { /* Don't lock as this is a transient report */ return shift_state; } /** * vt_reset_keyboard - reset keyboard state * @console: console to reset * * Reset the keyboard bits for a console as part of a general console * reset event */ void vt_reset_keyboard(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); set_vc_kbd_mode(kb, VC_REPEAT); clr_vc_kbd_mode(kb, VC_CKMODE); clr_vc_kbd_mode(kb, VC_APPLIC); clr_vc_kbd_mode(kb, VC_CRLF); kb->lockstate = 0; kb->slockstate = 0; spin_lock(&led_lock); kb->ledmode = LED_SHOW_FLAGS; kb->ledflagstate = kb->default_ledflagstate; spin_unlock(&led_lock); /* do not do set_leds here because this causes an endless tasklet loop when the keyboard hasn't been initialized yet */ spin_unlock_irqrestore(&kbd_event_lock, flags); } /** * vt_get_kbd_mode_bit - read keyboard status bits * @console: console to read from * @bit: mode bit to read * * Report back a vt mode bit. We do this without locking so the * caller must be sure that there are no synchronization needs */ int vt_get_kbd_mode_bit(unsigned int console, int bit) { struct kbd_struct *kb = &kbd_table[console]; return vc_kbd_mode(kb, bit); } /** * vt_set_kbd_mode_bit - read keyboard status bits * @console: console to read from * @bit: mode bit to read * * Set a vt mode bit. We do this without locking so the * caller must be sure that there are no synchronization needs */ void vt_set_kbd_mode_bit(unsigned int console, int bit) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); set_vc_kbd_mode(kb, bit); spin_unlock_irqrestore(&kbd_event_lock, flags); } /** * vt_clr_kbd_mode_bit - read keyboard status bits * @console: console to read from * @bit: mode bit to read * * Report back a vt mode bit. We do this without locking so the * caller must be sure that there are no synchronization needs */ void vt_clr_kbd_mode_bit(unsigned int console, int bit) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); clr_vc_kbd_mode(kb, bit); spin_unlock_irqrestore(&kbd_event_lock, flags); } |
4 4 4 4 4 4 4 4 3 12 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/cfm_bridge.h> #include <uapi/linux/cfm_bridge.h> #include "br_private_cfm.h" static struct br_cfm_mep *br_mep_find(struct net_bridge *br, u32 instance) { struct br_cfm_mep *mep; hlist_for_each_entry(mep, &br->mep_list, head) if (mep->instance == instance) return mep; return NULL; } static struct br_cfm_mep *br_mep_find_ifindex(struct net_bridge *br, u32 ifindex) { struct br_cfm_mep *mep; hlist_for_each_entry_rcu(mep, &br->mep_list, head, lockdep_rtnl_is_held()) if (mep->create.ifindex == ifindex) return mep; return NULL; } static struct br_cfm_peer_mep *br_peer_mep_find(struct br_cfm_mep *mep, u32 mepid) { struct br_cfm_peer_mep *peer_mep; hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head, lockdep_rtnl_is_held()) if (peer_mep->mepid == mepid) return peer_mep; return NULL; } static struct net_bridge_port *br_mep_get_port(struct net_bridge *br, u32 ifindex) { struct net_bridge_port *port; list_for_each_entry(port, &br->port_list, list) if (port->dev->ifindex == ifindex) return port; return NULL; } /* Calculate the CCM interval in us. */ static u32 interval_to_us(enum br_cfm_ccm_interval interval) { switch (interval) { case BR_CFM_CCM_INTERVAL_NONE: return 0; case BR_CFM_CCM_INTERVAL_3_3_MS: return 3300; case BR_CFM_CCM_INTERVAL_10_MS: return 10 * 1000; case BR_CFM_CCM_INTERVAL_100_MS: return 100 * 1000; case BR_CFM_CCM_INTERVAL_1_SEC: return 1000 * 1000; case BR_CFM_CCM_INTERVAL_10_SEC: return 10 * 1000 * 1000; case BR_CFM_CCM_INTERVAL_1_MIN: return 60 * 1000 * 1000; case BR_CFM_CCM_INTERVAL_10_MIN: return 10 * 60 * 1000 * 1000; } return 0; } /* Convert the interface interval to CCM PDU value. */ static u32 interval_to_pdu(enum br_cfm_ccm_interval interval) { switch (interval) { case BR_CFM_CCM_INTERVAL_NONE: return 0; case BR_CFM_CCM_INTERVAL_3_3_MS: return 1; case BR_CFM_CCM_INTERVAL_10_MS: return 2; case BR_CFM_CCM_INTERVAL_100_MS: return 3; case BR_CFM_CCM_INTERVAL_1_SEC: return 4; case BR_CFM_CCM_INTERVAL_10_SEC: return 5; case BR_CFM_CCM_INTERVAL_1_MIN: return 6; case BR_CFM_CCM_INTERVAL_10_MIN: return 7; } return 0; } /* Convert the CCM PDU value to interval on interface. */ static u32 pdu_to_interval(u32 value) { switch (value) { case 0: return BR_CFM_CCM_INTERVAL_NONE; case 1: return BR_CFM_CCM_INTERVAL_3_3_MS; case 2: return BR_CFM_CCM_INTERVAL_10_MS; case 3: return BR_CFM_CCM_INTERVAL_100_MS; case 4: return BR_CFM_CCM_INTERVAL_1_SEC; case 5: return BR_CFM_CCM_INTERVAL_10_SEC; case 6: return BR_CFM_CCM_INTERVAL_1_MIN; case 7: return BR_CFM_CCM_INTERVAL_10_MIN; } return BR_CFM_CCM_INTERVAL_NONE; } static void ccm_rx_timer_start(struct br_cfm_peer_mep *peer_mep) { u32 interval_us; interval_us = interval_to_us(peer_mep->mep->cc_config.exp_interval); /* Function ccm_rx_dwork must be called with 1/4 * of the configured CC 'expected_interval' * in order to detect CCM defect after 3.25 interval. */ queue_delayed_work(system_wq, &peer_mep->ccm_rx_dwork, usecs_to_jiffies(interval_us / 4)); } static void br_cfm_notify(int event, const struct net_bridge_port *port) { u32 filter = RTEXT_FILTER_CFM_STATUS; br_info_notify(event, port->br, NULL, filter); } static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep) { memset(&peer_mep->cc_status, 0, sizeof(peer_mep->cc_status)); peer_mep->ccm_rx_count_miss = 0; ccm_rx_timer_start(peer_mep); } static void cc_peer_disable(struct br_cfm_peer_mep *peer_mep) { cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); } static struct sk_buff *ccm_frame_build(struct br_cfm_mep *mep, const struct br_cfm_cc_ccm_tx_info *const tx_info) { struct br_cfm_common_hdr *common_hdr; struct net_bridge_port *b_port; struct br_cfm_maid *maid; u8 *itu_reserved, *e_tlv; struct ethhdr *eth_hdr; struct sk_buff *skb; __be32 *status_tlv; __be32 *snumber; __be16 *mepid; skb = dev_alloc_skb(CFM_CCM_MAX_FRAME_LENGTH); if (!skb) return NULL; rcu_read_lock(); b_port = rcu_dereference(mep->b_port); if (!b_port) { kfree_skb(skb); rcu_read_unlock(); return NULL; } skb->dev = b_port->dev; rcu_read_unlock(); /* The device cannot be deleted until the work_queue functions has * completed. This function is called from ccm_tx_work_expired() * that is a work_queue functions. */ skb->protocol = htons(ETH_P_CFM); skb->priority = CFM_FRAME_PRIO; /* Ethernet header */ eth_hdr = skb_put(skb, sizeof(*eth_hdr)); ether_addr_copy(eth_hdr->h_dest, tx_info->dmac.addr); ether_addr_copy(eth_hdr->h_source, mep->config.unicast_mac.addr); eth_hdr->h_proto = htons(ETH_P_CFM); /* Common CFM Header */ common_hdr = skb_put(skb, sizeof(*common_hdr)); common_hdr->mdlevel_version = mep->config.mdlevel << 5; common_hdr->opcode = BR_CFM_OPCODE_CCM; common_hdr->flags = (mep->rdi << 7) | interval_to_pdu(mep->cc_config.exp_interval); common_hdr->tlv_offset = CFM_CCM_TLV_OFFSET; /* Sequence number */ snumber = skb_put(skb, sizeof(*snumber)); if (tx_info->seq_no_update) { *snumber = cpu_to_be32(mep->ccm_tx_snumber); mep->ccm_tx_snumber += 1; } else { *snumber = 0; } mepid = skb_put(skb, sizeof(*mepid)); *mepid = cpu_to_be16((u16)mep->config.mepid); maid = skb_put(skb, sizeof(*maid)); memcpy(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data)); /* ITU reserved (CFM_CCM_ITU_RESERVED_SIZE octets) */ itu_reserved = skb_put(skb, CFM_CCM_ITU_RESERVED_SIZE); memset(itu_reserved, 0, CFM_CCM_ITU_RESERVED_SIZE); /* Generel CFM TLV format: * TLV type: one byte * TLV value length: two bytes * TLV value: 'TLV value length' bytes */ /* Port status TLV. The value length is 1. Total of 4 bytes. */ if (tx_info->port_tlv) { status_tlv = skb_put(skb, sizeof(*status_tlv)); *status_tlv = cpu_to_be32((CFM_PORT_STATUS_TLV_TYPE << 24) | (1 << 8) | /* Value length */ (tx_info->port_tlv_value & 0xFF)); } /* Interface status TLV. The value length is 1. Total of 4 bytes. */ if (tx_info->if_tlv) { status_tlv = skb_put(skb, sizeof(*status_tlv)); *status_tlv = cpu_to_be32((CFM_IF_STATUS_TLV_TYPE << 24) | (1 << 8) | /* Value length */ (tx_info->if_tlv_value & 0xFF)); } /* End TLV */ e_tlv = skb_put(skb, sizeof(*e_tlv)); *e_tlv = CFM_ENDE_TLV_TYPE; return skb; } static void ccm_frame_tx(struct sk_buff *skb) { skb_reset_network_header(skb); dev_queue_xmit(skb); } /* This function is called with the configured CC 'expected_interval' * in order to drive CCM transmission when enabled. */ static void ccm_tx_work_expired(struct work_struct *work) { struct delayed_work *del_work; struct br_cfm_mep *mep; struct sk_buff *skb; u32 interval_us; del_work = to_delayed_work(work); mep = container_of(del_work, struct br_cfm_mep, ccm_tx_dwork); if (time_before_eq(mep->ccm_tx_end, jiffies)) { /* Transmission period has ended */ mep->cc_ccm_tx_info.period = 0; return; } skb = ccm_frame_build(mep, &mep->cc_ccm_tx_info); if (skb) ccm_frame_tx(skb); interval_us = interval_to_us(mep->cc_config.exp_interval); queue_delayed_work(system_wq, &mep->ccm_tx_dwork, usecs_to_jiffies(interval_us)); } /* This function is called with 1/4 of the configured CC 'expected_interval' * in order to detect CCM defect after 3.25 interval. */ static void ccm_rx_work_expired(struct work_struct *work) { struct br_cfm_peer_mep *peer_mep; struct net_bridge_port *b_port; struct delayed_work *del_work; del_work = to_delayed_work(work); peer_mep = container_of(del_work, struct br_cfm_peer_mep, ccm_rx_dwork); /* After 13 counts (4 * 3,25) then 3.25 intervals are expired */ if (peer_mep->ccm_rx_count_miss < 13) { /* 3.25 intervals are NOT expired without CCM reception */ peer_mep->ccm_rx_count_miss++; /* Start timer again */ ccm_rx_timer_start(peer_mep); } else { /* 3.25 intervals are expired without CCM reception. * CCM defect detected */ peer_mep->cc_status.ccm_defect = true; /* Change in CCM defect status - notify */ rcu_read_lock(); b_port = rcu_dereference(peer_mep->mep->b_port); if (b_port) br_cfm_notify(RTM_NEWLINK, b_port); rcu_read_unlock(); } } static u32 ccm_tlv_extract(struct sk_buff *skb, u32 index, struct br_cfm_peer_mep *peer_mep) { __be32 *s_tlv; __be32 _s_tlv; u32 h_s_tlv; u8 *e_tlv; u8 _e_tlv; e_tlv = skb_header_pointer(skb, index, sizeof(_e_tlv), &_e_tlv); if (!e_tlv) return 0; /* TLV is present - get the status TLV */ s_tlv = skb_header_pointer(skb, index, sizeof(_s_tlv), &_s_tlv); if (!s_tlv) return 0; h_s_tlv = ntohl(*s_tlv); if ((h_s_tlv >> 24) == CFM_IF_STATUS_TLV_TYPE) { /* Interface status TLV */ peer_mep->cc_status.tlv_seen = true; peer_mep->cc_status.if_tlv_value = (h_s_tlv & 0xFF); } if ((h_s_tlv >> 24) == CFM_PORT_STATUS_TLV_TYPE) { /* Port status TLV */ peer_mep->cc_status.tlv_seen = true; peer_mep->cc_status.port_tlv_value = (h_s_tlv & 0xFF); } /* The Sender ID TLV is not handled */ /* The Organization-Specific TLV is not handled */ /* Return the length of this tlv. * This is the length of the value field plus 3 bytes for size of type * field and length field */ return ((h_s_tlv >> 8) & 0xFFFF) + 3; } /* note: already called with rcu_read_lock */ static int br_cfm_frame_rx(struct net_bridge_port *port, struct sk_buff *skb) { u32 mdlevel, interval, size, index, max; const struct br_cfm_common_hdr *hdr; struct br_cfm_peer_mep *peer_mep; const struct br_cfm_maid *maid; struct br_cfm_common_hdr _hdr; struct br_cfm_maid _maid; struct br_cfm_mep *mep; struct net_bridge *br; __be32 *snumber; __be32 _snumber; __be16 *mepid; __be16 _mepid; if (port->state == BR_STATE_DISABLED) return 0; hdr = skb_header_pointer(skb, 0, sizeof(_hdr), &_hdr); if (!hdr) return 1; br = port->br; mep = br_mep_find_ifindex(br, port->dev->ifindex); if (unlikely(!mep)) /* No MEP on this port - must be forwarded */ return 0; mdlevel = hdr->mdlevel_version >> 5; if (mdlevel > mep->config.mdlevel) /* The level is above this MEP level - must be forwarded */ return 0; if ((hdr->mdlevel_version & 0x1F) != 0) { /* Invalid version */ mep->status.version_unexp_seen = true; return 1; } if (mdlevel < mep->config.mdlevel) { /* The level is below this MEP level */ mep->status.rx_level_low_seen = true; return 1; } if (hdr->opcode == BR_CFM_OPCODE_CCM) { /* CCM PDU received. */ /* MA ID is after common header + sequence number + MEP ID */ maid = skb_header_pointer(skb, CFM_CCM_PDU_MAID_OFFSET, sizeof(_maid), &_maid); if (!maid) return 1; if (memcmp(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data))) /* MA ID not as expected */ return 1; /* MEP ID is after common header + sequence number */ mepid = skb_header_pointer(skb, CFM_CCM_PDU_MEPID_OFFSET, sizeof(_mepid), &_mepid); if (!mepid) return 1; peer_mep = br_peer_mep_find(mep, (u32)ntohs(*mepid)); if (!peer_mep) return 1; /* Interval is in common header flags */ interval = hdr->flags & 0x07; if (mep->cc_config.exp_interval != pdu_to_interval(interval)) /* Interval not as expected */ return 1; /* A valid CCM frame is received */ if (peer_mep->cc_status.ccm_defect) { peer_mep->cc_status.ccm_defect = false; /* Change in CCM defect status - notify */ br_cfm_notify(RTM_NEWLINK, port); /* Start CCM RX timer */ ccm_rx_timer_start(peer_mep); } peer_mep->cc_status.seen = true; peer_mep->ccm_rx_count_miss = 0; /* RDI is in common header flags */ peer_mep->cc_status.rdi = (hdr->flags & 0x80) ? true : false; /* Sequence number is after common header */ snumber = skb_header_pointer(skb, CFM_CCM_PDU_SEQNR_OFFSET, sizeof(_snumber), &_snumber); if (!snumber) return 1; if (ntohl(*snumber) != (mep->ccm_rx_snumber + 1)) /* Unexpected sequence number */ peer_mep->cc_status.seq_unexp_seen = true; mep->ccm_rx_snumber = ntohl(*snumber); /* TLV end is after common header + sequence number + MEP ID + * MA ID + ITU reserved */ index = CFM_CCM_PDU_TLV_OFFSET; max = 0; do { /* Handle all TLVs */ size = ccm_tlv_extract(skb, index, peer_mep); index += size; max += 1; } while (size != 0 && max < 4); /* Max four TLVs possible */ return 1; } mep->status.opcode_unexp_seen = true; return 1; } static struct br_frame_type cfm_frame_type __read_mostly = { .type = cpu_to_be16(ETH_P_CFM), .frame_handler = br_cfm_frame_rx, }; int br_cfm_mep_create(struct net_bridge *br, const u32 instance, struct br_cfm_mep_create *const create, struct netlink_ext_ack *extack) { struct net_bridge_port *p; struct br_cfm_mep *mep; ASSERT_RTNL(); if (create->domain == BR_CFM_VLAN) { NL_SET_ERR_MSG_MOD(extack, "VLAN domain not supported"); return -EINVAL; } if (create->domain != BR_CFM_PORT) { NL_SET_ERR_MSG_MOD(extack, "Invalid domain value"); return -EINVAL; } if (create->direction == BR_CFM_MEP_DIRECTION_UP) { NL_SET_ERR_MSG_MOD(extack, "Up-MEP not supported"); return -EINVAL; } if (create->direction != BR_CFM_MEP_DIRECTION_DOWN) { NL_SET_ERR_MSG_MOD(extack, "Invalid direction value"); return -EINVAL; } p = br_mep_get_port(br, create->ifindex); if (!p) { NL_SET_ERR_MSG_MOD(extack, "Port is not related to bridge"); return -EINVAL; } mep = br_mep_find(br, instance); if (mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance already exists"); return -EEXIST; } /* In PORT domain only one instance can be created per port */ if (create->domain == BR_CFM_PORT) { mep = br_mep_find_ifindex(br, create->ifindex); if (mep) { NL_SET_ERR_MSG_MOD(extack, "Only one Port MEP on a port allowed"); return -EINVAL; } } mep = kzalloc(sizeof(*mep), GFP_KERNEL); if (!mep) return -ENOMEM; mep->create = *create; mep->instance = instance; rcu_assign_pointer(mep->b_port, p); INIT_HLIST_HEAD(&mep->peer_mep_list); INIT_DELAYED_WORK(&mep->ccm_tx_dwork, ccm_tx_work_expired); if (hlist_empty(&br->mep_list)) br_add_frame(br, &cfm_frame_type); hlist_add_tail_rcu(&mep->head, &br->mep_list); return 0; } static void mep_delete_implementation(struct net_bridge *br, struct br_cfm_mep *mep) { struct br_cfm_peer_mep *peer_mep; struct hlist_node *n_store; ASSERT_RTNL(); /* Empty and free peer MEP list */ hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) { cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); } cancel_delayed_work_sync(&mep->ccm_tx_dwork); RCU_INIT_POINTER(mep->b_port, NULL); hlist_del_rcu(&mep->head); kfree_rcu(mep, rcu); if (hlist_empty(&br->mep_list)) br_del_frame(br, &cfm_frame_type); } int br_cfm_mep_delete(struct net_bridge *br, const u32 instance, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep_delete_implementation(br, mep); return 0; } int br_cfm_mep_config_set(struct net_bridge *br, const u32 instance, const struct br_cfm_mep_config *const config, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep->config = *config; return 0; } int br_cfm_cc_config_set(struct net_bridge *br, const u32 instance, const struct br_cfm_cc_config *const config, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } /* Check for no change in configuration */ if (memcmp(config, &mep->cc_config, sizeof(*config)) == 0) return 0; if (config->enable && !mep->cc_config.enable) /* CC is enabled */ hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) cc_peer_enable(peer_mep); if (!config->enable && mep->cc_config.enable) /* CC is disabled */ hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) cc_peer_disable(peer_mep); mep->cc_config = *config; mep->ccm_rx_snumber = 0; mep->ccm_tx_snumber = 1; return 0; } int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance, u32 mepid, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } peer_mep = br_peer_mep_find(mep, mepid); if (peer_mep) { NL_SET_ERR_MSG_MOD(extack, "Peer MEP-ID already exists"); return -EEXIST; } peer_mep = kzalloc(sizeof(*peer_mep), GFP_KERNEL); if (!peer_mep) return -ENOMEM; peer_mep->mepid = mepid; peer_mep->mep = mep; INIT_DELAYED_WORK(&peer_mep->ccm_rx_dwork, ccm_rx_work_expired); if (mep->cc_config.enable) cc_peer_enable(peer_mep); hlist_add_tail_rcu(&peer_mep->head, &mep->peer_mep_list); return 0; } int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, u32 mepid, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } peer_mep = br_peer_mep_find(mep, mepid); if (!peer_mep) { NL_SET_ERR_MSG_MOD(extack, "Peer MEP-ID does not exists"); return -ENOENT; } cc_peer_disable(peer_mep); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); return 0; } int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance, const bool rdi, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep->rdi = rdi; return 0; } int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance, const struct br_cfm_cc_ccm_tx_info *const tx_info, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } if (memcmp(tx_info, &mep->cc_ccm_tx_info, sizeof(*tx_info)) == 0) { /* No change in tx_info. */ if (mep->cc_ccm_tx_info.period == 0) /* Transmission is not enabled - just return */ return 0; /* Transmission is ongoing, the end time is recalculated */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); return 0; } if (tx_info->period == 0 && mep->cc_ccm_tx_info.period == 0) /* Some change in info and transmission is not ongoing */ goto save; if (tx_info->period != 0 && mep->cc_ccm_tx_info.period != 0) { /* Some change in info and transmission is ongoing * The end time is recalculated */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); goto save; } if (tx_info->period == 0 && mep->cc_ccm_tx_info.period != 0) { cancel_delayed_work_sync(&mep->ccm_tx_dwork); goto save; } /* Start delayed work to transmit CCM frames. It is done with zero delay * to send first frame immediately */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 0); save: mep->cc_ccm_tx_info = *tx_info; return 0; } int br_cfm_mep_count(struct net_bridge *br, u32 *count) { struct br_cfm_mep *mep; *count = 0; rcu_read_lock(); hlist_for_each_entry_rcu(mep, &br->mep_list, head) *count += 1; rcu_read_unlock(); return 0; } int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; *count = 0; rcu_read_lock(); hlist_for_each_entry_rcu(mep, &br->mep_list, head) hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) *count += 1; rcu_read_unlock(); return 0; } bool br_cfm_created(struct net_bridge *br) { return !hlist_empty(&br->mep_list); } /* Deletes the CFM instances on a specific bridge port */ void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port) { struct hlist_node *n_store; struct br_cfm_mep *mep; ASSERT_RTNL(); hlist_for_each_entry_safe(mep, n_store, &br->mep_list, head) if (mep->create.ifindex == port->dev->ifindex) mep_delete_implementation(br, mep); } |
110 115 285 307 310 255 55 310 9 374 24 15 21 432 16 258 70 24 2 2 1 2 31 3 3 3 244 219 146 538 538 533 156 1977 625 1424 1495 866 673 92 451 575 575 471 105 5 266 40 16 29 52 2 723 228 20 14 9 11 3 11 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> */ #ifndef _NET_IPV6_H #define _NET_IPV6_H #include <linux/ipv6.h> #include <linux/hardirq.h> #include <linux/jhash.h> #include <linux/refcount.h> #include <linux/jump_label_ratelimit.h> #include <net/if_inet6.h> #include <net/flow.h> #include <net/flow_dissector.h> #include <net/inet_dscp.h> #include <net/snmp.h> #include <net/netns/hash.h> struct ip_tunnel_info; #define SIN6_LEN_RFC2133 24 #define IPV6_MAXPLEN 65535 /* * NextHeader field of IPv6 header */ #define NEXTHDR_HOP 0 /* Hop-by-hop option header. */ #define NEXTHDR_IPV4 4 /* IPv4 in IPv6 */ #define NEXTHDR_TCP 6 /* TCP segment. */ #define NEXTHDR_UDP 17 /* UDP message. */ #define NEXTHDR_IPV6 41 /* IPv6 in IPv6 */ #define NEXTHDR_ROUTING 43 /* Routing header. */ #define NEXTHDR_FRAGMENT 44 /* Fragmentation/reassembly header. */ #define NEXTHDR_GRE 47 /* GRE header. */ #define NEXTHDR_ESP 50 /* Encapsulating security payload. */ #define NEXTHDR_AUTH 51 /* Authentication header. */ #define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ #define NEXTHDR_NONE 59 /* No next header */ #define NEXTHDR_DEST 60 /* Destination options header. */ #define NEXTHDR_SCTP 132 /* SCTP message. */ #define NEXTHDR_MOBILITY 135 /* Mobility header. */ #define NEXTHDR_MAX 255 #define IPV6_DEFAULT_HOPLIMIT 64 #define IPV6_DEFAULT_MCASTHOPS 1 /* Limits on Hop-by-Hop and Destination options. * * Per RFC8200 there is no limit on the maximum number or lengths of options in * Hop-by-Hop or Destination options other then the packet must fit in an MTU. * We allow configurable limits in order to mitigate potential denial of * service attacks. * * There are three limits that may be set: * - Limit the number of options in a Hop-by-Hop or Destination options * extension header * - Limit the byte length of a Hop-by-Hop or Destination options extension * header * - Disallow unknown options * * The limits are expressed in corresponding sysctls: * * ipv6.sysctl.max_dst_opts_cnt * ipv6.sysctl.max_hbh_opts_cnt * ipv6.sysctl.max_dst_opts_len * ipv6.sysctl.max_hbh_opts_len * * max_*_opts_cnt is the number of TLVs that are allowed for Destination * options or Hop-by-Hop options. If the number is less than zero then unknown * TLVs are disallowed and the number of known options that are allowed is the * absolute value. Setting the value to INT_MAX indicates no limit. * * max_*_opts_len is the length limit in bytes of a Destination or * Hop-by-Hop options extension header. Setting the value to INT_MAX * indicates no length limit. * * If a limit is exceeded when processing an extension header the packet is * silently discarded. */ /* Default limits for Hop-by-Hop and Destination options */ #define IP6_DEFAULT_MAX_DST_OPTS_CNT 8 #define IP6_DEFAULT_MAX_HBH_OPTS_CNT 8 #define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ #define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ /* * Addr type * * type - unicast | multicast * scope - local | site | global * v4 - compat * v4mapped * any * loopback */ #define IPV6_ADDR_ANY 0x0000U #define IPV6_ADDR_UNICAST 0x0001U #define IPV6_ADDR_MULTICAST 0x0002U #define IPV6_ADDR_LOOPBACK 0x0010U #define IPV6_ADDR_LINKLOCAL 0x0020U #define IPV6_ADDR_SITELOCAL 0x0040U #define IPV6_ADDR_COMPATv4 0x0080U #define IPV6_ADDR_SCOPE_MASK 0x00f0U #define IPV6_ADDR_MAPPED 0x1000U /* * Addr scopes */ #define IPV6_ADDR_MC_SCOPE(a) \ ((a)->s6_addr[1] & 0x0f) /* nonstandard */ #define __IPV6_ADDR_SCOPE_INVALID -1 #define IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 #define IPV6_ADDR_SCOPE_GLOBAL 0x0e /* * Addr flags */ #define IPV6_ADDR_MC_FLAG_TRANSIENT(a) \ ((a)->s6_addr[1] & 0x10) #define IPV6_ADDR_MC_FLAG_PREFIX(a) \ ((a)->s6_addr[1] & 0x20) #define IPV6_ADDR_MC_FLAG_RENDEZVOUS(a) \ ((a)->s6_addr[1] & 0x40) /* * fragmentation header */ struct frag_hdr { __u8 nexthdr; __u8 reserved; __be16 frag_off; __be32 identification; }; /* * Jumbo payload option, as described in RFC 2675 2. */ struct hop_jumbo_hdr { u8 nexthdr; u8 hdrlen; u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ u8 tlv_len; /* 4 */ __be32 jumbo_payload_len; }; #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 struct ip6_fraglist_iter { struct ipv6hdr *tmp_hdr; struct sk_buff *frag; int offset; unsigned int hlen; __be32 frag_id; u8 nexthdr; }; int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_fraglist_iter *iter); void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter); static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter) { struct sk_buff *skb = iter->frag; iter->frag = skb->next; skb_mark_not_on_list(skb); return skb; } struct ip6_frag_state { u8 *prevhdr; unsigned int hlen; unsigned int mtu; unsigned int left; int offset; int ptr; int hroom; int troom; __be32 frag_id; u8 nexthdr; }; void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state); struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state); #define IP6_REPLY_MARK(net, mark) \ ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) #include <net/sock.h> /* sysctls */ extern int sysctl_mld_max_msf; extern int sysctl_mld_qrv; #define _DEVINC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\ mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\ }) /* per device counters are atomic_long_t */ #define _DEVINCATOMIC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \ mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\ }) /* per device and per net counters are atomic_long_t */ #define _DEVINC_ATOMIC_ATOMIC(net, statname, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \ SNMP_INC_STATS_ATOMIC_LONG((net)->mib.statname##_statistics, (field));\ }) #define _DEVADD(net, statname, mod, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ mod##SNMP_ADD_STATS((_idev)->stats.statname, (field), (val)); \ mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, (field), (val));\ }) #define _DEVUPD(net, statname, mod, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, (val)); \ mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, (val));\ }) /* MIBs */ #define IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, , idev, field) #define __IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, __, idev, field) #define IP6_ADD_STATS(net, idev,field,val) \ _DEVADD(net, ipv6, , idev, field, val) #define __IP6_ADD_STATS(net, idev,field,val) \ _DEVADD(net, ipv6, __, idev, field, val) #define IP6_UPD_PO_STATS(net, idev,field,val) \ _DEVUPD(net, ipv6, , idev, field, val) #define __IP6_UPD_PO_STATS(net, idev,field,val) \ _DEVUPD(net, ipv6, __, idev, field, val) #define ICMP6_INC_STATS(net, idev, field) \ _DEVINCATOMIC(net, icmpv6, , idev, field) #define __ICMP6_INC_STATS(net, idev, field) \ _DEVINCATOMIC(net, icmpv6, __, idev, field) #define ICMP6MSGOUT_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) #define ICMP6MSGIN_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field) struct ip6_ra_chain { struct ip6_ra_chain *next; struct sock *sk; int sel; void (*destructor)(struct sock *); }; extern struct ip6_ra_chain *ip6_ra_chain; extern rwlock_t ip6_ra_lock; /* This structure is prepared by protocol, when parsing ancillary data and passed to IPv6. */ struct ipv6_txoptions { refcount_t refcnt; /* Length of this structure */ int tot_len; /* length of extension headers */ __u16 opt_flen; /* after fragment hdr */ __u16 opt_nflen; /* before fragment hdr */ struct ipv6_opt_hdr *hopopt; struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; /* flowlabel_reflect sysctl values */ enum flowlabel_reflect { FLOWLABEL_REFLECT_ESTABLISHED = 1, FLOWLABEL_REFLECT_TCP_RESET = 2, FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES = 4, }; struct ip6_flowlabel { struct ip6_flowlabel __rcu *next; __be32 label; atomic_t users; struct in6_addr dst; struct ipv6_txoptions *opt; unsigned long linger; struct rcu_head rcu; u8 share; union { struct pid *pid; kuid_t uid; } owner; unsigned long lastuse; unsigned long expires; struct net *fl_net; }; #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) #define IPV6_FLOWLABEL_STATELESS_FLAG cpu_to_be32(0x00080000) #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 struct ipv6_fl_socklist { struct ipv6_fl_socklist __rcu *next; struct ip6_flowlabel *fl; struct rcu_head rcu; }; struct ipcm6_cookie { struct sockcm_cookie sockc; __s16 hlimit; __s16 tclass; __u16 gso_size; __s8 dontfrag; struct ipv6_txoptions *opt; }; static inline void ipcm6_init(struct ipcm6_cookie *ipc6) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, .tclass = -1, .dontfrag = -1, }; } static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, const struct sock *sk) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, .tclass = inet6_sk(sk)->tclass, .dontfrag = inet6_test_bit(DONTFRAG, sk), }; } static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) { struct ipv6_txoptions *opt; rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) { if (!refcount_inc_not_zero(&opt->refcnt)) opt = NULL; else opt = rcu_pointer_handoff(opt); } rcu_read_unlock(); return opt; } static inline void txopt_put(struct ipv6_txoptions *opt) { if (opt && refcount_dec_and_test(&opt->refcnt)) kfree_rcu(opt, rcu); } #if IS_ENABLED(CONFIG_IPV6) struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); extern struct static_key_false_deferred ipv6_flowlabel_exclusive; static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); return NULL; } #endif struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, struct ipv6_txoptions *fopt); void fl6_free_socklist(struct sock *sk); int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen); int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); bool ip6_autoflowlabel(struct net *net, const struct sock *sk); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { if (fl) atomic_dec(&fl->users); } enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len); int ip6_ra_control(struct sock *sk, int sel); int ipv6_parse_hopopts(struct sk_buff *skb); struct ipv6_txoptions *ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt); struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt); static inline struct ipv6_txoptions * ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) { if (!opt) return NULL; return __ipv6_fixup_options(opt_space, opt); } bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); /* This helper is specialized for BIG TCP needs. * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. * It assumes headers are already in skb->head. * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there. */ static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) { const struct hop_jumbo_hdr *jhdr; const struct ipv6hdr *nhdr; if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) return 0; if (skb->protocol != htons(ETH_P_IPV6)) return 0; if (skb_network_offset(skb) + sizeof(struct ipv6hdr) + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) return 0; nhdr = ipv6_hdr(skb); if (nhdr->nexthdr != NEXTHDR_HOP) return 0; jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || jhdr->nexthdr != IPPROTO_TCP) return 0; return jhdr->nexthdr; } /* Return 0 if HBH header is successfully removed * Or if HBH removal is unnecessary (packet is not big TCP) * Return error to indicate dropping the packet */ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) { const int hophdr_len = sizeof(struct hop_jumbo_hdr); int nexthdr = ipv6_has_hopopt_jumbo(skb); struct ipv6hdr *h6; if (!nexthdr) return 0; if (skb_cow_head(skb, 0)) return -1; /* Remove the HBH header. * Layout: [Ethernet header][IPv6 header][HBH][L4 Header] */ memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb), skb_network_header(skb) - skb_mac_header(skb) + sizeof(struct ipv6hdr)); __skb_pull(skb, hophdr_len); skb->network_header += hophdr_len; skb->mac_header += hophdr_len; h6 = ipv6_hdr(skb); h6->nexthdr = nexthdr; return 0; } static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */ int __ipv6_addr_type(const struct in6_addr *addr); static inline int ipv6_addr_type(const struct in6_addr *addr) { return __ipv6_addr_type(addr) & 0xffff; } static inline int ipv6_addr_scope(const struct in6_addr *addr) { return __ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK; } static inline int __ipv6_addr_src_scope(int type) { return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16); } static inline int ipv6_addr_src_scope(const struct in6_addr *addr) { return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } static inline bool __ipv6_addr_needs_scope_id(int type) { return type & IPV6_ADDR_LINKLOCAL || (type & IPV6_ADDR_MULTICAST && (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL))); } static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface) { return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0; } static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) { return memcmp(a1, a2, sizeof(struct in6_addr)); } static inline bool ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ulm = (const unsigned long *)m; const unsigned long *ul2 = (const unsigned long *)a2; return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | ((ul1[1] ^ ul2[1]) & ulm[1])); #else return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); #endif } static inline void ipv6_addr_prefix(struct in6_addr *pfx, const struct in6_addr *addr, int plen) { /* caller must guarantee 0 <= plen <= 128 */ int o = plen >> 3, b = plen & 0x7; memset(pfx->s6_addr, 0, sizeof(pfx->s6_addr)); memcpy(pfx->s6_addr, addr, o); if (b != 0) pfx->s6_addr[o] = addr->s6_addr[o] & (0xff00 >> b); } static inline void ipv6_addr_prefix_copy(struct in6_addr *addr, const struct in6_addr *pfx, int plen) { /* caller must guarantee 0 <= plen <= 128 */ int o = plen >> 3, b = plen & 0x7; memcpy(addr->s6_addr, pfx, o); if (b != 0) { addr->s6_addr[o] &= ~(0xff00 >> b); addr->s6_addr[o] |= (pfx->s6_addr[o] & (0xff00 >> b)); } } static inline void __ipv6_addr_set_half(__be32 *addr, __be32 wh, __be32 wl) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #if defined(__BIG_ENDIAN) if (__builtin_constant_p(wh) && __builtin_constant_p(wl)) { *(__force u64 *)addr = ((__force u64)(wh) << 32 | (__force u64)(wl)); return; } #elif defined(__LITTLE_ENDIAN) if (__builtin_constant_p(wl) && __builtin_constant_p(wh)) { *(__force u64 *)addr = ((__force u64)(wl) << 32 | (__force u64)(wh)); return; } #endif #endif addr[0] = wh; addr[1] = wl; } static inline void ipv6_addr_set(struct in6_addr *addr, __be32 w1, __be32 w2, __be32 w3, __be32 w4) { __ipv6_addr_set_half(&addr->s6_addr32[0], w1, w2); __ipv6_addr_set_half(&addr->s6_addr32[2], w3, w4); } static inline bool ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ul2 = (const unsigned long *)a2; return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; #else return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; #endif } #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 static inline bool __ipv6_prefix_equal64_half(const __be64 *a1, const __be64 *a2, unsigned int len) { if (len && ((*a1 ^ *a2) & cpu_to_be64((~0UL) << (64 - len)))) return false; return true; } static inline bool ipv6_prefix_equal(const struct in6_addr *addr1, const struct in6_addr *addr2, unsigned int prefixlen) { const __be64 *a1 = (const __be64 *)addr1; const __be64 *a2 = (const __be64 *)addr2; if (prefixlen >= 64) { if (a1[0] ^ a2[0]) return false; return __ipv6_prefix_equal64_half(a1 + 1, a2 + 1, prefixlen - 64); } return __ipv6_prefix_equal64_half(a1, a2, prefixlen); } #else static inline bool ipv6_prefix_equal(const struct in6_addr *addr1, const struct in6_addr *addr2, unsigned int prefixlen) { const __be32 *a1 = addr1->s6_addr32; const __be32 *a2 = addr2->s6_addr32; unsigned int pdw, pbi; /* check complete u32 in prefix */ pdw = prefixlen >> 5; if (pdw && memcmp(a1, a2, pdw << 2)) return false; /* check incomplete u32 in prefix */ pbi = prefixlen & 0x1f; if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi)))) return false; return true; } #endif static inline bool ipv6_addr_any(const struct in6_addr *a) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul = (const unsigned long *)a; return (ul[0] | ul[1]) == 0UL; #else return (a->s6_addr32[0] | a->s6_addr32[1] | a->s6_addr32[2] | a->s6_addr32[3]) == 0; #endif } static inline u32 ipv6_addr_hash(const struct in6_addr *a) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul = (const unsigned long *)a; unsigned long x = ul[0] ^ ul[1]; return (u32)(x ^ (x >> 32)); #else return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^ a->s6_addr32[2] ^ a->s6_addr32[3]); #endif } /* more secured version of ipv6_addr_hash() */ static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { return jhash2((__force const u32 *)a->s6_addr32, ARRAY_SIZE(a->s6_addr32), initval); } static inline bool ipv6_addr_loopback(const struct in6_addr *a) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const __be64 *be = (const __be64 *)a; return (be[0] | (be[1] ^ cpu_to_be64(1))) == 0UL; #else return (a->s6_addr32[0] | a->s6_addr32[1] | a->s6_addr32[2] | (a->s6_addr32[3] ^ cpu_to_be32(1))) == 0; #endif } /* * Note that we must __force cast these to unsigned long to make sparse happy, * since all of the endian-annotated types are fixed size regardless of arch. */ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) { return ( #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 *(unsigned long *)a | #else (__force unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | #endif (__force unsigned long)(a->s6_addr32[2] ^ cpu_to_be32(0x0000ffff))) == 0UL; } static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a) { return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]); } static inline u32 ipv6_portaddr_hash(const struct net *net, const struct in6_addr *addr6, unsigned int port) { unsigned int hash, mix = net_hash_mix(net); if (ipv6_addr_any(addr6)) hash = jhash_1word(0, mix); else if (ipv6_addr_v4mapped(addr6)) hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); else hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); return hash ^ port; } /* * Check for a RFC 4843 ORCHID address * (Overlay Routable Cryptographic Hash Identifiers) */ static inline bool ipv6_addr_orchid(const struct in6_addr *a) { return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010); } static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr) { return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); } static inline void ipv6_addr_set_v4mapped(const __be32 addr, struct in6_addr *v4mapped) { ipv6_addr_set(v4mapped, 0, 0, htonl(0x0000FFFF), addr); } /* * find the first different bit between two addresses * length of address must be a multiple of 32bits */ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int addrlen) { const __be32 *a1 = token1, *a2 = token2; int i; addrlen >>= 2; for (i = 0; i < addrlen; i++) { __be32 xb = a1[i] ^ a2[i]; if (xb) return i * 32 + 31 - __fls(ntohl(xb)); } /* * we should *never* get to this point since that * would mean the addrs are equal * * However, we do get to it 8) And exactly, when * addresses are equal 8) * * ip route add 1111::/128 via ... * ip route add 1111::/64 via ... * and we are here. * * Ideally, this function should stop comparison * at prefix length. It does not, but it is still OK, * if returned value is greater than prefix length. * --ANK (980803) */ return addrlen << 5; } #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 static inline int __ipv6_addr_diff64(const void *token1, const void *token2, int addrlen) { const __be64 *a1 = token1, *a2 = token2; int i; addrlen >>= 3; for (i = 0; i < addrlen; i++) { __be64 xb = a1[i] ^ a2[i]; if (xb) return i * 64 + 63 - __fls(be64_to_cpu(xb)); } return addrlen << 6; } #endif static inline int __ipv6_addr_diff(const void *token1, const void *token2, int addrlen) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 if (__builtin_constant_p(addrlen) && !(addrlen & 7)) return __ipv6_addr_diff64(token1, token2, addrlen); #endif return __ipv6_addr_diff32(token1, token2, addrlen); } static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) { return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr); __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, struct dst_entry *dst) { int hlimit; if (ipv6_addr_is_multicast(&fl6->daddr)) hlimit = READ_ONCE(np->mcast_hops); else hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); return hlimit; } /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store * Equivalent to : flow->v6addrs.src = iph->saddr; * flow->v6addrs.dst = iph->daddr; */ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, const struct ipv6hdr *iph) { BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != offsetof(typeof(flow->addrs), v6addrs.src) + sizeof(flow->addrs.v6addrs.src)); memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } #if IS_ENABLED(CONFIG_IPV6) static inline bool ipv6_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { return net->ipv6.sysctl.ip_nonlocal_bind || test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) || test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags); } /* Sysctl settings for net ipv6.auto_flowlabels */ #define IP6_AUTO_FLOW_LABEL_OFF 0 #define IP6_AUTO_FLOW_LABEL_OPTOUT 1 #define IP6_AUTO_FLOW_LABEL_OPTIN 2 #define IP6_AUTO_FLOW_LABEL_FORCED 3 #define IP6_AUTO_FLOW_LABEL_MAX IP6_AUTO_FLOW_LABEL_FORCED #define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OPTOUT static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { u32 hash; /* @flowlabel may include more than a flow label, eg, the traffic class. * Here we want only the flow label value. */ flowlabel &= IPV6_FLOWLABEL_MASK; if (flowlabel || net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || (!autolabel && net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) return flowlabel; hash = skb_get_hash_flowi6(skb, fl6); /* Since this is being sent on the wire obfuscate hash a bit * to minimize possibility that any useful information to an * attacker is leaked. Only lower 20 bits are relevant. */ hash = rol32(hash, 16); flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; if (net->ipv6.sysctl.flowlabel_state_ranges) flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; return flowlabel; } static inline int ip6_default_np_autolabel(struct net *net) { switch (net->ipv6.sysctl.auto_flowlabels) { case IP6_AUTO_FLOW_LABEL_OFF: case IP6_AUTO_FLOW_LABEL_OPTIN: default: return 0; case IP6_AUTO_FLOW_LABEL_OPTOUT: case IP6_AUTO_FLOW_LABEL_FORCED: return 1; } } #else static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { return flowlabel; } static inline int ip6_default_np_autolabel(struct net *net) { return 0; } #endif #if IS_ENABLED(CONFIG_IPV6) static inline int ip6_multipath_hash_policy(const struct net *net) { return net->ipv6.sysctl.multipath_hash_policy; } static inline u32 ip6_multipath_hash_fields(const struct net *net) { return net->ipv6.sysctl.multipath_hash_fields; } #else static inline int ip6_multipath_hash_policy(const struct net *net) { return 0; } static inline u32 ip6_multipath_hash_fields(const struct net *net) { return 0; } #endif /* * Header manipulation */ static inline void ip6_flow_hdr(struct ipv6hdr *hdr, unsigned int tclass, __be32 flowlabel) { *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | flowlabel; } static inline __be32 ip6_flowinfo(const struct ipv6hdr *hdr) { return *(__be32 *)hdr & IPV6_FLOWINFO_MASK; } static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) { return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; } static inline u8 ip6_tclass(__be32 flowinfo) { return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } static inline dscp_t ip6_dscp(__be32 flowinfo) { return inet_dsfield_to_dscp(ip6_tclass(flowinfo)); } static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) { return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; } static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6) { return fl6->flowlabel & IPV6_FLOWLABEL_MASK; } /* * Prototypes exported by ipv6 */ /* * rcv function (called from netdevice level) */ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb); /* * upper-layer output functions */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags); int ip6_push_pending_frames(struct sock *sk); void ip6_flush_pending_frames(struct sock *sk); int ip6_send_skb(struct sk_buff *skb); struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); /* * skb processing functions */ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_forward(struct sk_buff *skb); int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, bool have_final); int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); /* * Extension header (options) processing */ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, struct in6_addr **daddr_p, struct in6_addr *saddr); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto); int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, __be16 *frag_offp); bool ipv6_ext_hdr(u8 nexthdr); enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), IP6_FH_F_SKIP_RH = (1 << 2), }; /* find specified header and get offset to it */ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *fragflg); int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type); struct in6_addr *fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, struct in6_addr *orig); /* * socket options (ipv6_sockglue.c) */ DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount); int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int do_ipv6_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); void ip6_datagram_release_cb(struct sock *sk); int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); void inet6_cleanup_sock(struct sock *sk); void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); /* * reassembly.c */ extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; extern const struct proto_ops inet6_sockraw_ops; struct group_source_req; struct group_filter; int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage *list); int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, sockptr_t optval, size_t ss_offset); #ifdef CONFIG_PROC_FS int ac6_proc_init(struct net *net); void ac6_proc_exit(struct net *net); int raw6_proc_init(void); void raw6_proc_exit(void); int tcp6_proc_init(struct net *net); void tcp6_proc_exit(struct net *net); int udp6_proc_init(struct net *net); void udp6_proc_exit(struct net *net); int udplite6_proc_init(void); void udplite6_proc_exit(void); int ipv6_misc_proc_init(void); void ipv6_misc_proc_exit(void); int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); #else static inline int ac6_proc_init(struct net *net) { return 0; } static inline void ac6_proc_exit(struct net *net) { } static inline int snmp6_register_dev(struct inet6_dev *idev) { return 0; } static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; } #endif #ifdef CONFIG_SYSCTL struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); size_t ipv6_icmp_sysctl_table_size(void); struct ctl_table *ipv6_route_sysctl_init(struct net *net); size_t ipv6_route_sysctl_table_size(struct net *net); int ipv6_sysctl_register(void); void ipv6_sysctl_unregister(void); #endif int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex, const struct in6_addr *addr, unsigned int mode); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); static inline int ip6_sock_set_v6only(struct sock *sk) { if (inet_sk(sk)->inet_num) return -EINVAL; lock_sock(sk); sk->sk_ipv6only = true; release_sock(sk); return 0; } static inline void ip6_sock_set_recverr(struct sock *sk) { inet6_set_bit(RECVERR6, sk); } #define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \ IPV6_PREFER_SRC_COA) static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) { unsigned int prefmask = ~IPV6_PREFER_SRC_MASK; unsigned int pref = 0; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC | IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { case IPV6_PREFER_SRC_PUBLIC: pref |= IPV6_PREFER_SRC_PUBLIC; prefmask &= ~(IPV6_PREFER_SRC_PUBLIC | IPV6_PREFER_SRC_TMP); break; case IPV6_PREFER_SRC_TMP: pref |= IPV6_PREFER_SRC_TMP; prefmask &= ~(IPV6_PREFER_SRC_PUBLIC | IPV6_PREFER_SRC_TMP); break; case IPV6_PREFER_SRC_PUBTMP_DEFAULT: prefmask &= ~(IPV6_PREFER_SRC_PUBLIC | IPV6_PREFER_SRC_TMP); break; case 0: break; default: return -EINVAL; } /* check HOME/COA conflicts */ switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) { case IPV6_PREFER_SRC_HOME: prefmask &= ~IPV6_PREFER_SRC_COA; break; case IPV6_PREFER_SRC_COA: pref |= IPV6_PREFER_SRC_COA; break; case 0: break; default: return -EINVAL; } /* check CGA/NONCGA conflicts */ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { case IPV6_PREFER_SRC_CGA: case IPV6_PREFER_SRC_NONCGA: case 0: break; default: return -EINVAL; } WRITE_ONCE(inet6_sk(sk)->srcprefs, (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref); return 0; } static inline void ip6_sock_set_recvpktinfo(struct sock *sk) { lock_sock(sk); inet6_sk(sk)->rxopt.bits.rxinfo = true; release_sock(sk); } #define IPV6_ADDR_WORDS 4 static inline void ipv6_addr_cpu_to_be32(__be32 *dst, const u32 *src) { cpu_to_be32_array(dst, src, IPV6_ADDR_WORDS); } static inline void ipv6_addr_be32_to_cpu(u32 *dst, const __be32 *src) { be32_to_cpu_array(dst, src, IPV6_ADDR_WORDS); } #endif /* _NET_IPV6_H */ |
3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 12 2 3 2 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 | // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2014 - 2020 Intel Corporation */ #include <crypto/algapi.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/bitops.h> #include <linux/pci.h> #include <linux/cdev.h> #include <linux/uaccess.h> #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_cfg.h" #include "adf_cfg_common.h" #include "adf_cfg_user.h" #define ADF_CFG_MAX_SECTION 512 #define ADF_CFG_MAX_KEY_VAL 256 #define DEVICE_NAME "qat_adf_ctl" static DEFINE_MUTEX(adf_ctl_lock); static long adf_ctl_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); static const struct file_operations adf_ctl_ops = { .owner = THIS_MODULE, .unlocked_ioctl = adf_ctl_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static const struct class adf_ctl_class = { .name = DEVICE_NAME, }; struct adf_ctl_drv_info { unsigned int major; struct cdev drv_cdev; }; static struct adf_ctl_drv_info adf_ctl_drv; static void adf_chr_drv_destroy(void) { device_destroy(&adf_ctl_class, MKDEV(adf_ctl_drv.major, 0)); cdev_del(&adf_ctl_drv.drv_cdev); class_unregister(&adf_ctl_class); unregister_chrdev_region(MKDEV(adf_ctl_drv.major, 0), 1); } static int adf_chr_drv_create(void) { dev_t dev_id; struct device *drv_device; int ret; if (alloc_chrdev_region(&dev_id, 0, 1, DEVICE_NAME)) { pr_err("QAT: unable to allocate chrdev region\n"); return -EFAULT; } ret = class_register(&adf_ctl_class); if (ret) goto err_chrdev_unreg; adf_ctl_drv.major = MAJOR(dev_id); cdev_init(&adf_ctl_drv.drv_cdev, &adf_ctl_ops); if (cdev_add(&adf_ctl_drv.drv_cdev, dev_id, 1)) { pr_err("QAT: cdev add failed\n"); goto err_class_destr; } drv_device = device_create(&adf_ctl_class, NULL, MKDEV(adf_ctl_drv.major, 0), NULL, DEVICE_NAME); if (IS_ERR(drv_device)) { pr_err("QAT: failed to create device\n"); goto err_cdev_del; } return 0; err_cdev_del: cdev_del(&adf_ctl_drv.drv_cdev); err_class_destr: class_unregister(&adf_ctl_class); err_chrdev_unreg: unregister_chrdev_region(dev_id, 1); return -EFAULT; } static int adf_ctl_alloc_resources(struct adf_user_cfg_ctl_data **ctl_data, unsigned long arg) { struct adf_user_cfg_ctl_data *cfg_data; cfg_data = kzalloc(sizeof(*cfg_data), GFP_KERNEL); if (!cfg_data) return -ENOMEM; /* Initialize device id to NO DEVICE as 0 is a valid device id */ cfg_data->device_id = ADF_CFG_NO_DEVICE; if (copy_from_user(cfg_data, (void __user *)arg, sizeof(*cfg_data))) { pr_err("QAT: failed to copy from user cfg_data.\n"); kfree(cfg_data); return -EIO; } *ctl_data = cfg_data; return 0; } static int adf_add_key_value_data(struct adf_accel_dev *accel_dev, const char *section, const struct adf_user_cfg_key_val *key_val) { if (key_val->type == ADF_HEX) { long *ptr = (long *)key_val->val; long val = *ptr; if (adf_cfg_add_key_value_param(accel_dev, section, key_val->key, (void *)val, key_val->type)) { dev_err(&GET_DEV(accel_dev), "failed to add hex keyvalue.\n"); return -EFAULT; } } else { if (adf_cfg_add_key_value_param(accel_dev, section, key_val->key, key_val->val, key_val->type)) { dev_err(&GET_DEV(accel_dev), "failed to add keyvalue.\n"); return -EFAULT; } } return 0; } static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev, struct adf_user_cfg_ctl_data *ctl_data) { struct adf_user_cfg_key_val key_val; struct adf_user_cfg_key_val *params_head; struct adf_user_cfg_section section, *section_head; int i, j; section_head = ctl_data->config_section; for (i = 0; section_head && i < ADF_CFG_MAX_SECTION; i++) { if (copy_from_user(§ion, (void __user *)section_head, sizeof(*section_head))) { dev_err(&GET_DEV(accel_dev), "failed to copy section info\n"); goto out_err; } if (adf_cfg_section_add(accel_dev, section.name)) { dev_err(&GET_DEV(accel_dev), "failed to add section.\n"); goto out_err; } params_head = section.params; for (j = 0; params_head && j < ADF_CFG_MAX_KEY_VAL; j++) { if (copy_from_user(&key_val, (void __user *)params_head, sizeof(key_val))) { dev_err(&GET_DEV(accel_dev), "Failed to copy keyvalue.\n"); goto out_err; } if (adf_add_key_value_data(accel_dev, section.name, &key_val)) { goto out_err; } params_head = key_val.next; } section_head = section.next; } return 0; out_err: adf_cfg_del_all(accel_dev); return -EFAULT; } static int adf_ctl_ioctl_dev_config(struct file *fp, unsigned int cmd, unsigned long arg) { int ret; struct adf_user_cfg_ctl_data *ctl_data; struct adf_accel_dev *accel_dev; ret = adf_ctl_alloc_resources(&ctl_data, arg); if (ret) return ret; accel_dev = adf_devmgr_get_dev_by_id(ctl_data->device_id); if (!accel_dev) { ret = -EFAULT; goto out; } if (adf_dev_started(accel_dev)) { ret = -EFAULT; goto out; } if (adf_copy_key_value_data(accel_dev, ctl_data)) { ret = -EFAULT; goto out; } set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); out: kfree(ctl_data); return ret; } static int adf_ctl_is_device_in_use(int id) { struct adf_accel_dev *dev; list_for_each_entry(dev, adf_devmgr_get_head(), list) { if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) { if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) { dev_info(&GET_DEV(dev), "device qat_dev%d is busy\n", dev->accel_id); return -EBUSY; } } } return 0; } static void adf_ctl_stop_devices(u32 id) { struct adf_accel_dev *accel_dev; list_for_each_entry(accel_dev, adf_devmgr_get_head(), list) { if (id == accel_dev->accel_id || id == ADF_CFG_ALL_DEVICES) { if (!adf_dev_started(accel_dev)) continue; /* First stop all VFs */ if (!accel_dev->is_vf) continue; adf_dev_down(accel_dev); } } list_for_each_entry(accel_dev, adf_devmgr_get_head(), list) { if (id == accel_dev->accel_id || id == ADF_CFG_ALL_DEVICES) { if (!adf_dev_started(accel_dev)) continue; adf_dev_down(accel_dev); } } } static int adf_ctl_ioctl_dev_stop(struct file *fp, unsigned int cmd, unsigned long arg) { int ret; struct adf_user_cfg_ctl_data *ctl_data; ret = adf_ctl_alloc_resources(&ctl_data, arg); if (ret) return ret; if (adf_devmgr_verify_id(ctl_data->device_id)) { pr_err("QAT: Device %d not found\n", ctl_data->device_id); ret = -ENODEV; goto out; } ret = adf_ctl_is_device_in_use(ctl_data->device_id); if (ret) goto out; if (ctl_data->device_id == ADF_CFG_ALL_DEVICES) pr_info("QAT: Stopping all acceleration devices.\n"); else pr_info("QAT: Stopping acceleration device qat_dev%d.\n", ctl_data->device_id); adf_ctl_stop_devices(ctl_data->device_id); out: kfree(ctl_data); return ret; } static int adf_ctl_ioctl_dev_start(struct file *fp, unsigned int cmd, unsigned long arg) { int ret; struct adf_user_cfg_ctl_data *ctl_data; struct adf_accel_dev *accel_dev; ret = adf_ctl_alloc_resources(&ctl_data, arg); if (ret) return ret; ret = -ENODEV; accel_dev = adf_devmgr_get_dev_by_id(ctl_data->device_id); if (!accel_dev) goto out; dev_info(&GET_DEV(accel_dev), "Starting acceleration device qat_dev%d.\n", ctl_data->device_id); ret = adf_dev_up(accel_dev, false); if (ret) { dev_err(&GET_DEV(accel_dev), "Failed to start qat_dev%d\n", ctl_data->device_id); adf_dev_down(accel_dev); } out: kfree(ctl_data); return ret; } static int adf_ctl_ioctl_get_num_devices(struct file *fp, unsigned int cmd, unsigned long arg) { u32 num_devices = 0; adf_devmgr_get_num_dev(&num_devices); if (copy_to_user((void __user *)arg, &num_devices, sizeof(num_devices))) return -EFAULT; return 0; } static int adf_ctl_ioctl_get_status(struct file *fp, unsigned int cmd, unsigned long arg) { struct adf_hw_device_data *hw_data; struct adf_dev_status_info dev_info; struct adf_accel_dev *accel_dev; if (copy_from_user(&dev_info, (void __user *)arg, sizeof(struct adf_dev_status_info))) { pr_err("QAT: failed to copy from user.\n"); return -EFAULT; } accel_dev = adf_devmgr_get_dev_by_id(dev_info.accel_id); if (!accel_dev) return -ENODEV; hw_data = accel_dev->hw_device; dev_info.state = adf_dev_started(accel_dev) ? DEV_UP : DEV_DOWN; dev_info.num_ae = hw_data->get_num_aes(hw_data); dev_info.num_accel = hw_data->get_num_accels(hw_data); dev_info.num_logical_accel = hw_data->num_logical_accel; dev_info.banks_per_accel = hw_data->num_banks / hw_data->num_logical_accel; strscpy(dev_info.name, hw_data->dev_class->name, sizeof(dev_info.name)); dev_info.instance_id = hw_data->instance_id; dev_info.type = hw_data->dev_class->type; dev_info.bus = accel_to_pci_dev(accel_dev)->bus->number; dev_info.dev = PCI_SLOT(accel_to_pci_dev(accel_dev)->devfn); dev_info.fun = PCI_FUNC(accel_to_pci_dev(accel_dev)->devfn); if (copy_to_user((void __user *)arg, &dev_info, sizeof(struct adf_dev_status_info))) { dev_err(&GET_DEV(accel_dev), "failed to copy status.\n"); return -EFAULT; } return 0; } static long adf_ctl_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { int ret; if (mutex_lock_interruptible(&adf_ctl_lock)) return -EFAULT; switch (cmd) { case IOCTL_CONFIG_SYS_RESOURCE_PARAMETERS: ret = adf_ctl_ioctl_dev_config(fp, cmd, arg); break; case IOCTL_STOP_ACCEL_DEV: ret = adf_ctl_ioctl_dev_stop(fp, cmd, arg); break; case IOCTL_START_ACCEL_DEV: ret = adf_ctl_ioctl_dev_start(fp, cmd, arg); break; case IOCTL_GET_NUM_DEVICES: ret = adf_ctl_ioctl_get_num_devices(fp, cmd, arg); break; case IOCTL_STATUS_ACCEL_DEV: ret = adf_ctl_ioctl_get_status(fp, cmd, arg); break; default: pr_err_ratelimited("QAT: Invalid ioctl %d\n", cmd); ret = -EFAULT; break; } mutex_unlock(&adf_ctl_lock); return ret; } static int __init adf_register_ctl_device_driver(void) { if (adf_chr_drv_create()) goto err_chr_dev; if (adf_init_misc_wq()) goto err_misc_wq; if (adf_init_aer()) goto err_aer; if (adf_init_pf_wq()) goto err_pf_wq; if (adf_init_vf_wq()) goto err_vf_wq; if (qat_crypto_register()) goto err_crypto_register; if (qat_compression_register()) goto err_compression_register; return 0; err_compression_register: qat_crypto_unregister(); err_crypto_register: adf_exit_vf_wq(); err_vf_wq: adf_exit_pf_wq(); err_pf_wq: adf_exit_aer(); err_aer: adf_exit_misc_wq(); err_misc_wq: adf_chr_drv_destroy(); err_chr_dev: mutex_destroy(&adf_ctl_lock); return -EFAULT; } static void __exit adf_unregister_ctl_device_driver(void) { adf_chr_drv_destroy(); adf_exit_misc_wq(); adf_exit_aer(); adf_exit_vf_wq(); adf_exit_pf_wq(); qat_crypto_unregister(); qat_compression_unregister(); adf_clean_vf_map(false); mutex_destroy(&adf_ctl_lock); } module_init(adf_register_ctl_device_driver); module_exit(adf_unregister_ctl_device_driver); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); MODULE_ALIAS_CRYPTO("intel_qat"); MODULE_VERSION(ADF_DRV_VERSION); MODULE_IMPORT_NS("CRYPTO_INTERNAL"); |
61 61 60 61 36 25 1 24 84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_proto_udp.c: UDP load balancing support for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> * Network name space (netns) aware. */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/udp.h> #include <linux/indirect_call_wrapper.h> #include <net/ip_vs.h> #include <net/ip.h> #include <net/ip6_checksum.h> static int udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { struct ip_vs_service *svc; struct udphdr _udph, *uh; __be16 _ports[2], *ports = NULL; if (likely(!ip_vs_iph_icmp(iph))) { /* IPv6 fragments, only first fragment will hit this */ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); if (uh) ports = &uh->source; } else { ports = skb_header_pointer( skb, iph->len, sizeof(_ports), &_ports); } if (!ports) { *verdict = NF_DROP; return 0; } if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); else svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->saddr, ports[0]); if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( */ *verdict = NF_DROP; return 0; } /* * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; return 0; } } /* NF_ACCEPT */ return 1; } static inline void udp_fast_csum_update(int af, struct udphdr *uhdr, const union nf_inet_addr *oldip, const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) uhdr->check = csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ip_vs_check_diff2(oldport, newport, ~csum_unfold(uhdr->check)))); else #endif uhdr->check = csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldport, newport, ~csum_unfold(uhdr->check)))); if (!uhdr->check) uhdr->check = CSUM_MANGLED_0; } static inline void udp_partial_csum_update(int af, struct udphdr *uhdr, const union nf_inet_addr *oldip, const union nf_inet_addr *newip, __be16 oldlen, __be16 newlen) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) uhdr->check = ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, ip_vs_check_diff2(oldlen, newlen, csum_unfold(uhdr->check)))); else #endif uhdr->check = ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldlen, newlen, csum_unfold(uhdr->check)))); } INDIRECT_CALLABLE_SCOPE int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; unsigned int udphoff = iph->len; bool payload_csum = false; int oldlen; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, udphoff + sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!udp_csum_check(cp->af, skb, pp)) return 0; /* * Call application helper if needed */ if (!(ret = ip_vs_app_pkt_out(cp, skb, iph))) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 1) oldlen = skb->len - udphoff; else payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; udph->source = cp->vport; /* * Adjust UDP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, htons(oldlen), htons(skb->len - udphoff)); } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) udph->check = csum_ipv6_magic(&cp->vaddr.in6, &cp->caddr.in6, skb->len - udphoff, cp->protocol, skb->csum); else #endif udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); } return 1; } static int udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; unsigned int udphoff = iph->len; bool payload_csum = false; int oldlen; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, udphoff + sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!udp_csum_check(cp->af, skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ if (!(ret = ip_vs_app_pkt_in(cp, skb, iph))) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 1) oldlen = skb->len - udphoff; else payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; udph->dest = cp->dport; /* * Adjust UDP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, htons(oldlen), htons(skb->len - udphoff)); } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) udph->check = csum_ipv6_magic(&cp->caddr.in6, &cp->daddr.in6, skb->len - udphoff, cp->protocol, skb->csum); else #endif udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } static int udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { struct udphdr _udph, *uh; unsigned int udphoff; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) udphoff = sizeof(struct ipv6hdr); else #endif udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) return 0; if (uh->check != 0) { switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); fallthrough; case CHECKSUM_COMPLETE: #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - udphoff, ipv6_hdr(skb)->nexthdr, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } } else #endif if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len - udphoff, ip_hdr(skb)->protocol, skb->csum)) { IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } break; default: /* No need to checksum. */ break; } } return 1; } static inline __u16 udp_app_hashkey(__be16 port) { return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) & UDP_APP_TAB_MASK; } static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); hash = udp_app_hashkey(port); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: return ret; } static void udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); } static int udp_app_conn_bind(struct ip_vs_conn *cp) { struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; /* Default binding: bind app only for NAT */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return 0; /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", __func__, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), inc->name, ntohs(inc->port)); cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); break; } } return result; } static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = 5*60*HZ, [IP_VS_UDP_S_LAST] = 2*HZ, }; static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = "UDP", [IP_VS_UDP_S_LAST] = "BUG!", }; static const char * udp_state_name(int state) { if (state >= IP_VS_UDP_S_LAST) return "ERR!"; return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; } static void udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { if (unlikely(!pd)) { pr_err("UDP no ns data\n"); return; } cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; if (direction == IP_VS_DIR_OUTPUT) ip_vs_control_assure_ct(cp); } static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) return -ENOMEM; return 0; } static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_udp = { .name = "UDP", .protocol = IPPROTO_UDP, .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, .init = NULL, .exit = NULL, .init_netns = __udp_init, .exit_netns = __udp_exit, .conn_schedule = udp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .state_transition = udp_state_transition, .state_name = udp_state_name, .register_app = udp_register_app, .unregister_app = udp_unregister_app, .app_conn_bind = udp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, }; |
6395 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ipi #if !defined(_TRACE_IPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IPI_H #include <linux/tracepoint.h> /** * ipi_raise - called when a smp cross call is made * * @mask: mask of recipient CPUs for the IPI * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string. */ TRACE_EVENT(ipi_raise, TP_PROTO(const struct cpumask *mask, const char *reason), TP_ARGS(mask, reason), TP_STRUCT__entry( __bitmask(target_cpus, nr_cpumask_bits) __field(const char *, reason) ), TP_fast_assign( __assign_bitmask(target_cpus, cpumask_bits(mask), nr_cpumask_bits); __entry->reason = reason; ), TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) ); TRACE_EVENT(ipi_send_cpu, TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), TP_ARGS(cpu, callsite, callback), TP_STRUCT__entry( __field(unsigned int, cpu) __field(void *, callsite) __field(void *, callback) ), TP_fast_assign( __entry->cpu = cpu; __entry->callsite = (void *)callsite; __entry->callback = callback; ), TP_printk("cpu=%u callsite=%pS callback=%pS", __entry->cpu, __entry->callsite, __entry->callback) ); TRACE_EVENT(ipi_send_cpumask, TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), TP_ARGS(cpumask, callsite, callback), TP_STRUCT__entry( __cpumask(cpumask) __field(void *, callsite) __field(void *, callback) ), TP_fast_assign( __assign_cpumask(cpumask, cpumask_bits(cpumask)); __entry->callsite = (void *)callsite; __entry->callback = callback; ), TP_printk("cpumask=%s callsite=%pS callback=%pS", __get_cpumask(cpumask), __entry->callsite, __entry->callback) ); DECLARE_EVENT_CLASS(ipi_handler, TP_PROTO(const char *reason), TP_ARGS(reason), TP_STRUCT__entry( __field(const char *, reason) ), TP_fast_assign( __entry->reason = reason; ), TP_printk("(%s)", __entry->reason) ); /** * ipi_entry - called immediately before the IPI handler * * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string, ideally the same as used with trace_ipi_raise * for that IPI. */ DEFINE_EVENT(ipi_handler, ipi_entry, TP_PROTO(const char *reason), TP_ARGS(reason) ); /** * ipi_exit - called immediately after the IPI handler returns * * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string, ideally the same as used with trace_ipi_raise for * that IPI. */ DEFINE_EVENT(ipi_handler, ipi_exit, TP_PROTO(const char *reason), TP_ARGS(reason) ); #endif /* _TRACE_IPI_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
941 680 263 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __LICENSE_H #define __LICENSE_H static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 || strcmp(license, "GPL v2") == 0 || strcmp(license, "GPL and additional rights") == 0 || strcmp(license, "Dual BSD/GPL") == 0 || strcmp(license, "Dual MIT/GPL") == 0 || strcmp(license, "Dual MPL/GPL") == 0); } #endif |
6 6 6 6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | // SPDX-License-Identifier: GPL-2.0 /* * Interface between ext4 and JBD */ #include "ext4_jbd2.h" #include <trace/events/ext4.h> int ext4_inode_journal_mode(struct inode *inode) { if (EXT4_JOURNAL(inode) == NULL) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ /* We do not support data journalling with delayed allocation */ if (!S_ISREG(inode->i_mode) || ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) || test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && !test_opt(inode->i_sb, DELALLOC))) { /* We do not support data journalling for encrypted data */ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ } if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ BUG(); } /* Just increment the non-pointer handle value */ static handle_t *ext4_get_nojournal(void) { handle_t *handle = current->journal_info; unsigned long ref_cnt = (unsigned long)handle; BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); ref_cnt++; handle = (handle_t *)ref_cnt; current->journal_info = handle; return handle; } /* Decrement the non-pointer handle value */ static void ext4_put_nojournal(handle_t *handle) { unsigned long ref_cnt = (unsigned long)handle; BUG_ON(ref_cnt == 0); ref_cnt--; handle = (handle_t *)ref_cnt; current->journal_info = handle; } /* * Wrappers for jbd2_journal_start/end. */ static int ext4_journal_check_start(struct super_block *sb) { journal_t *journal; might_sleep(); if (unlikely(ext4_forced_shutdown(sb))) return -EIO; if (WARN_ON_ONCE(sb_rdonly(sb))) return -EROFS; WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); journal = EXT4_SB(sb)->s_journal; /* * Special case here: if the journal has aborted behind our * backs (eg. EIO in the commit thread), then we still need to * take the FS itself readonly cleanly. */ if (journal && is_journal_aborted(journal)) { ext4_abort(sb, -journal->j_errno, "Detected aborted journal"); return -EROFS; } return 0; } handle_t *__ext4_journal_start_sb(struct inode *inode, struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, int revoke_creds) { journal_t *journal; int err; if (inode) trace_ext4_journal_start_inode(inode, blocks, rsv_blocks, revoke_creds, type, _RET_IP_); else trace_ext4_journal_start_sb(sb, blocks, rsv_blocks, revoke_creds, type, _RET_IP_); err = ext4_journal_check_start(sb); if (err < 0) return ERR_PTR(err); journal = EXT4_SB(sb)->s_journal; if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return ext4_get_nojournal(); return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds, GFP_NOFS, type, line); } int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) { struct super_block *sb; int err; int rc; if (!ext4_handle_valid(handle)) { ext4_put_nojournal(handle); return 0; } err = handle->h_err; if (!handle->h_transaction) { rc = jbd2_journal_stop(handle); return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; rc = jbd2_journal_stop(handle); if (!err) err = rc; if (err) __ext4_std_error(sb, where, line, err); return err; } handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, int type) { struct super_block *sb; int err; if (!ext4_handle_valid(handle)) return ext4_get_nojournal(); sb = handle->h_journal->j_private; trace_ext4_journal_start_reserved(sb, jbd2_handle_buffer_credits(handle), _RET_IP_); err = ext4_journal_check_start(sb); if (err < 0) { jbd2_journal_free_reserved(handle); return ERR_PTR(err); } err = jbd2_journal_start_reserved(handle, type, line); if (err < 0) return ERR_PTR(err); return handle; } int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, int extend_cred, int revoke_cred) { if (!ext4_handle_valid(handle)) return 0; if (is_handle_aborted(handle)) return -EROFS; if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; extend_cred = max(0, extend_cred - jbd2_handle_buffer_credits(handle)); revoke_cred = max(0, revoke_cred - handle->h_revoke_credits); return ext4_journal_extend(handle, extend_cred, revoke_cred); } static void ext4_journal_abort_handle(const char *caller, unsigned int line, const char *err_fn, struct buffer_head *bh, handle_t *handle, int err) { char nbuf[16]; const char *errstr = ext4_decode_error(NULL, err, nbuf); BUG_ON(!ext4_handle_valid(handle)); if (bh) BUFFER_TRACE(bh, "abort"); if (!handle->h_err) handle->h_err = err; if (is_handle_aborted(handle)) return; printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", caller, line, errstr, err_fn); jbd2_journal_abort_handle(handle); } static void ext4_check_bdev_write_error(struct super_block *sb) { struct address_space *mapping = sb->s_bdev->bd_mapping; struct ext4_sb_info *sbi = EXT4_SB(sb); int err; /* * If the block device has write error flag, it may have failed to * async write out metadata buffers in the background. In this case, * we could read old data from disk and write it out again, which * may lead to on-disk filesystem inconsistency. */ if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) { spin_lock(&sbi->s_bdev_wb_lock); err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err); spin_unlock(&sbi->s_bdev_wb_lock); if (err) ext4_error_err(sb, -err, "Error while async write back metadata"); } } int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct super_block *sb, struct buffer_head *bh, enum ext4_journal_trigger_type trigger_type) { int err; might_sleep(); if (ext4_handle_valid(handle)) { err = jbd2_journal_get_write_access(handle, bh); if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); return err; } } else ext4_check_bdev_write_error(sb); if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb)) return 0; BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT); jbd2_journal_set_triggers(bh, &EXT4_SB(sb)->s_journal_triggers[trigger_type].tr_triggers); return 0; } /* * The ext4 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be * revoked in all cases. * * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. */ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr) { int err; might_sleep(); trace_ext4_forget(inode, is_metadata, blocknr); BUFFER_TRACE(bh, "enter"); ext4_debug("forgetting bh %p: is_metadata=%d, mode %o, data mode %x\n", bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); /* In the no journal case, we can just do a bforget and return */ if (!ext4_handle_valid(handle)) { bforget(bh); return 0; } /* Never use the revoke function if we are doing full data * journaling: there is no need to, and a V1 superblock won't * support it. Otherwise, only skip the revoke on un-journaled * data blocks. */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || (!is_metadata && !ext4_should_journal_data(inode))) { if (bh) { BUFFER_TRACE(bh, "call jbd2_journal_forget"); err = jbd2_journal_forget(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); return err; } return 0; } /* * data!=journal && (is_metadata || should_journal_data(inode)) */ BUFFER_TRACE(bh, "call jbd2_journal_revoke"); err = jbd2_journal_revoke(handle, blocknr, bh); if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); __ext4_error(inode->i_sb, where, line, true, -err, 0, "error %d when attempting revoke", err); } BUFFER_TRACE(bh, "exit"); return err; } int __ext4_journal_get_create_access(const char *where, unsigned int line, handle_t *handle, struct super_block *sb, struct buffer_head *bh, enum ext4_journal_trigger_type trigger_type) { int err; if (!ext4_handle_valid(handle)) return 0; err = jbd2_journal_get_create_access(handle, bh); if (err) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); return err; } if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb)) return 0; BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT); jbd2_journal_set_triggers(bh, &EXT4_SB(sb)->s_journal_triggers[trigger_type].tr_triggers); return 0; } int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh) { int err = 0; might_sleep(); set_buffer_meta(bh); set_buffer_prio(bh); set_buffer_uptodate(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); /* Errors can only happen due to aborted journal or a nasty bug */ if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); if (inode == NULL) { pr_err("EXT4: jbd2_journal_dirty_metadata " "failed: handle type %u started at " "line %u, credits %u/%u, errcode %d", handle->h_type, handle->h_line_no, handle->h_requested_credits, jbd2_handle_buffer_credits(handle), err); return err; } ext4_error_inode(inode, where, line, bh->b_blocknr, "journal_dirty_metadata failed: " "handle type %u started at line %u, " "credits %u/%u, errcode %d", handle->h_type, handle->h_line_no, handle->h_requested_credits, jbd2_handle_buffer_credits(handle), err); } } else { if (inode) mark_buffer_dirty_inode(bh, inode); else mark_buffer_dirty(bh); if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { ext4_error_inode_err(inode, where, line, bh->b_blocknr, EIO, "IO error syncing itable block"); err = -EIO; } } } return err; } |
10 10 2 4 2 2 2 2 10 2 2 2 4 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 3 1 2 1 2 3 3 8 8 8 8 22 22 5 5 3 2 1 4 2 5 5 2 2 1 1 2 5 4 2 1 1 1 3 2 1 1 4 3 1 5 5 2 3 1 4 3 1 47 1 11 1 1 5 1 1 1 4 5 4 12 5 24 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2009 Red Hat, Inc. * Author: Michael S. Tsirkin <mst@redhat.com> * * virtio-net server in host kernel. */ #include <linux/compat.h> #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/virtio_net.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/workqueue.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/sched/clock.h> #include <linux/sched/signal.h> #include <linux/vmalloc.h> #include <linux/net.h> #include <linux/if_packet.h> #include <linux/if_arp.h> #include <linux/if_tun.h> #include <linux/if_macvlan.h> #include <linux/if_tap.h> #include <linux/if_vlan.h> #include <linux/skb_array.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/xdp.h> #include "vhost.h" static int experimental_zcopytx = 0; module_param(experimental_zcopytx, int, 0444); MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" " 1 -Enable; 0 - Disable"); /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others with small * pkts. */ #define VHOST_NET_PKT_WEIGHT 256 /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 #define VHOST_GOODCOPY_LEN 256 /* * For transmit, used buffer len is unused; we override it to track buffer * status internally; used for zerocopy tx only. */ /* Lower device DMA failed */ #define VHOST_DMA_FAILED_LEN ((__force __virtio32)3) /* Lower device DMA done */ #define VHOST_DMA_DONE_LEN ((__force __virtio32)2) /* Lower device DMA in progress */ #define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1) /* Buffer unused */ #define VHOST_DMA_CLEAR_LEN ((__force __virtio32)0) #define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN) enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_ACCESS_PLATFORM) | (1ULL << VIRTIO_F_RING_RESET) }; enum { VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; enum { VHOST_NET_VQ_RX = 0, VHOST_NET_VQ_TX = 1, VHOST_NET_VQ_MAX = 2, }; struct vhost_net_ubuf_ref { /* refcount follows semantics similar to kref: * 0: object is released * 1: no outstanding ubufs * >1: outstanding ubufs */ atomic_t refcount; wait_queue_head_t wait; struct vhost_virtqueue *vq; }; #define VHOST_NET_BATCH 64 struct vhost_net_buf { void **queue; int tail; int head; }; struct vhost_net_virtqueue { struct vhost_virtqueue vq; size_t vhost_hlen; size_t sock_hlen; /* vhost zerocopy support fields below: */ /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; /* For TX, first used idx for DMA done zerocopy buffers * For RX, number of batched heads */ int done_idx; /* Number of XDP frames batched */ int batched_xdp; /* an array of userspace buffers info */ struct ubuf_info_msgzc *ubuf_info; /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; struct ptr_ring *rx_ring; struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; }; struct vhost_net { struct vhost_dev dev; struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; /* Number of times zerocopy TX recently failed. * Protected by tx vq lock. */ unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; /* Private page frag cache */ struct page_frag_cache pf_cache; }; static unsigned vhost_net_zcopy_mask __read_mostly; static void *vhost_net_buf_get_ptr(struct vhost_net_buf *rxq) { if (rxq->tail != rxq->head) return rxq->queue[rxq->head]; else return NULL; } static int vhost_net_buf_get_size(struct vhost_net_buf *rxq) { return rxq->tail - rxq->head; } static int vhost_net_buf_is_empty(struct vhost_net_buf *rxq) { return rxq->tail == rxq->head; } static void *vhost_net_buf_consume(struct vhost_net_buf *rxq) { void *ret = vhost_net_buf_get_ptr(rxq); ++rxq->head; return ret; } static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; rxq->head = 0; rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, VHOST_NET_BATCH); return rxq->tail; } static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, vhost_net_buf_get_size(rxq), tun_ptr_free); rxq->head = rxq->tail = 0; } } static int vhost_net_buf_peek_len(void *ptr) { if (tun_is_xdp_frame(ptr)) { struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); return xdpf->len; } return __skb_array_len_with_tag(ptr); } static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; if (!vhost_net_buf_is_empty(rxq)) goto out; if (!vhost_net_buf_produce(nvq)) return 0; out: return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq)); } static void vhost_net_buf_init(struct vhost_net_buf *rxq) { rxq->head = rxq->tail = 0; } static void vhost_net_enable_zcopy(int vq) { vhost_net_zcopy_mask |= 0x1 << vq; } static struct vhost_net_ubuf_ref * vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) { struct vhost_net_ubuf_ref *ubufs; /* No zero copy backend? Nothing to count. */ if (!zcopy) return NULL; ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); if (!ubufs) return ERR_PTR(-ENOMEM); atomic_set(&ubufs->refcount, 1); init_waitqueue_head(&ubufs->wait); ubufs->vq = vq; return ubufs; } static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) { int r = atomic_sub_return(1, &ubufs->refcount); if (unlikely(!r)) wake_up(&ubufs->wait); return r; } static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put(ubufs); wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); } static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put_and_wait(ubufs); kfree(ubufs); } static void vhost_net_clear_ubuf_info(struct vhost_net *n) { int i; for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { kfree(n->vqs[i].ubuf_info); n->vqs[i].ubuf_info = NULL; } } static int vhost_net_set_ubuf_info(struct vhost_net *n) { bool zcopy; int i; for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { zcopy = vhost_net_zcopy_mask & (0x1 << i); if (!zcopy) continue; n->vqs[i].ubuf_info = kmalloc_array(UIO_MAXIOV, sizeof(*n->vqs[i].ubuf_info), GFP_KERNEL); if (!n->vqs[i].ubuf_info) goto err; } return 0; err: vhost_net_clear_ubuf_info(n); return -ENOMEM; } static void vhost_net_vq_reset(struct vhost_net *n) { int i; vhost_net_clear_ubuf_info(n); for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].done_idx = 0; n->vqs[i].upend_idx = 0; n->vqs[i].ubufs = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; vhost_net_buf_init(&n->vqs[i].rxq); } } static void vhost_net_tx_packet(struct vhost_net *net) { ++net->tx_packets; if (net->tx_packets < 1024) return; net->tx_packets = 0; net->tx_zcopy_err = 0; } static void vhost_net_tx_err(struct vhost_net *net) { ++net->tx_zcopy_err; } static bool vhost_net_tx_select_zcopy(struct vhost_net *net) { /* TX flush waits for outstanding DMAs to be done. * Don't start new DMAs. */ return !net->tx_flush && net->tx_packets / 64 >= net->tx_zcopy_err; } static bool vhost_sock_zcopy(struct socket *sock) { return unlikely(experimental_zcopytx) && sock_flag(sock->sk, SOCK_ZEROCOPY); } static bool vhost_sock_xdp(struct socket *sock) { return sock_flag(sock->sk, SOCK_XDP); } /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM * guest used idx. */ static void vhost_zerocopy_signal_used(struct vhost_net *net, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); int i, add; int j = 0; for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) vhost_net_tx_err(net); if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { vq->heads[i].len = VHOST_DMA_CLEAR_LEN; ++j; } else break; } while (j) { add = min(UIO_MAXIOV - nvq->done_idx, j); vhost_add_used_and_signal_n(vq->dev, vq, &vq->heads[nvq->done_idx], add); nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; j -= add; } } static void vhost_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *ubuf_base, bool success) { struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; struct vhost_virtqueue *vq = ubufs->vq; int cnt; rcu_read_lock_bh(); /* set len to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; cnt = vhost_net_ubuf_put(ubufs); /* * Trigger polling thread if guest stopped submitting new buffers: * in this case, the refcount after decrement will eventually reach 1. * We also trigger polling periodically after each 16 packets * (the value 16 here is more or less arbitrary, it's tuned to trigger * less than 10% of times). */ if (cnt <= 1 || !(cnt % 16)) vhost_poll_queue(&vq->poll); rcu_read_unlock_bh(); } static const struct ubuf_info_ops vhost_ubuf_ops = { .complete = vhost_zerocopy_complete, }; static inline unsigned long busy_clock(void) { return local_clock() >> 10; } static bool vhost_can_busy_poll(unsigned long endtime) { return likely(!need_resched() && !time_after(busy_clock(), endtime) && !signal_pending(current)); } static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); if (!vhost_vq_get_backend(vq)) return; vhost_poll_stop(poll); } static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; sock = vhost_vq_get_backend(vq); if (!sock) return 0; return vhost_poll_start(poll, sock->file); } static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) { struct vhost_virtqueue *vq = &nvq->vq; struct vhost_dev *dev = vq->dev; if (!nvq->done_idx) return; vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); nvq->done_idx = 0; } static void vhost_tx_batch(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct socket *sock, struct msghdr *msghdr) { struct tun_msg_ctl ctl = { .type = TUN_MSG_PTR, .num = nvq->batched_xdp, .ptr = nvq->xdp, }; int i, err; if (nvq->batched_xdp == 0) goto signal_used; msghdr->msg_control = &ctl; msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); /* free pages owned by XDP; since this is an unlikely error path, * keep it simple and avoid more complex bulk update for the * used pages */ for (i = 0; i < nvq->batched_xdp; ++i) put_page(virt_to_head_page(nvq->xdp[i].data)); nvq->batched_xdp = 0; nvq->done_idx = 0; return; } signal_used: vhost_net_signal_used(nvq); nvq->batched_xdp = 0; } static int sock_has_rx_data(struct socket *sock) { if (unlikely(!sock)) return 0; if (sock->ops->peek_len) return sock->ops->peek_len(sock); return skb_queue_empty(&sock->sk->sk_receive_queue); } static void vhost_net_busy_poll_try_queue(struct vhost_net *net, struct vhost_virtqueue *vq) { if (!vhost_vq_avail_empty(&net->dev, vq)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); vhost_poll_queue(&vq->poll); } } static void vhost_net_busy_poll(struct vhost_net *net, struct vhost_virtqueue *rvq, struct vhost_virtqueue *tvq, bool *busyloop_intr, bool poll_rx) { unsigned long busyloop_timeout; unsigned long endtime; struct socket *sock; struct vhost_virtqueue *vq = poll_rx ? tvq : rvq; /* Try to hold the vq mutex of the paired virtqueue. We can't * use mutex_lock() here since we could not guarantee a * consistenet lock ordering. */ if (!mutex_trylock(&vq->mutex)) return; vhost_disable_notify(&net->dev, vq); sock = vhost_vq_get_backend(rvq); busyloop_timeout = poll_rx ? rvq->busyloop_timeout: tvq->busyloop_timeout; preempt_disable(); endtime = busy_clock() + busyloop_timeout; while (vhost_can_busy_poll(endtime)) { if (vhost_vq_has_work(vq)) { *busyloop_intr = true; break; } if ((sock_has_rx_data(sock) && !vhost_vq_avail_empty(&net->dev, rvq)) || !vhost_vq_avail_empty(&net->dev, tvq)) break; cpu_relax(); } preempt_enable(); if (poll_rx || sock_has_rx_data(sock)) vhost_net_busy_poll_try_queue(net, vq); else if (!poll_rx) /* On tx here, sock has no rx data. */ vhost_enable_notify(&net->dev, rvq); mutex_unlock(&vq->mutex); } static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_net_virtqueue *tnvq, unsigned int *out_num, unsigned int *in_num, struct msghdr *msghdr, bool *busyloop_intr) { struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *rvq = &rnvq->vq; struct vhost_virtqueue *tvq = &tnvq->vq; int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); if (r == tvq->num && tvq->busyloop_timeout) { /* Flush batched packets first */ if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq))) vhost_tx_batch(net, tnvq, vhost_vq_get_backend(tvq), msghdr); vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); } return r; } static bool vhost_exceeds_maxpend(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; return (nvq->upend_idx + UIO_MAXIOV - nvq->done_idx) % UIO_MAXIOV > min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2); } static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, size_t hdr_size, int out) { /* Skip header. TODO: support TSO. */ size_t len = iov_length(vq->iov, out); iov_iter_init(iter, ITER_SOURCE, vq->iov, out, len); iov_iter_advance(iter, hdr_size); return iov_iter_count(iter); } static int get_tx_bufs(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct msghdr *msg, unsigned int *out, unsigned int *in, size_t *len, bool *busyloop_intr) { struct vhost_virtqueue *vq = &nvq->vq; int ret; ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr); if (ret < 0 || ret == vq->num) return ret; if (*in) { vq_err(vq, "Unexpected descriptor format for TX: out %d, int %d\n", *out, *in); return -EFAULT; } /* Sanity check */ *len = init_iov_iter(vq, &msg->msg_iter, nvq->vhost_hlen, *out); if (*len == 0) { vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n", *len, nvq->vhost_hlen); return -EFAULT; } return ret; } static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) { return total_len < VHOST_NET_WEIGHT && !vhost_vq_avail_empty(vq->dev, vq); } #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct iov_iter *from) { struct vhost_virtqueue *vq = &nvq->vq; struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); struct socket *sock = vhost_vq_get_backend(vq); struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; size_t len = iov_iter_count(from); int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0; int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + headroom + nvq->sock_hlen); int sock_hlen = nvq->sock_hlen; void *buf; int copied; int ret; if (unlikely(len < nvq->sock_hlen)) return -EFAULT; if (SKB_DATA_ALIGN(len + pad) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) return -ENOSPC; buflen += SKB_DATA_ALIGN(len + pad); buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, SMP_CACHE_BYTES); if (unlikely(!buf)) return -ENOMEM; copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), sock_hlen, from); if (copied != sock_hlen) { ret = -EFAULT; goto err; } hdr = buf; gso = &hdr->gso; if (!sock_hlen) memset(buf, 0, pad); if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2 > vhost16_to_cpu(vq, gso->hdr_len)) { gso->hdr_len = cpu_to_vhost16(vq, vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2); if (vhost16_to_cpu(vq, gso->hdr_len) > len) { ret = -EINVAL; goto err; } } len -= sock_hlen; copied = copy_from_iter(buf + pad, len, from); if (copied != len) { ret = -EFAULT; goto err; } xdp_init_buff(xdp, buflen, NULL); xdp_prepare_buff(xdp, buf, pad, len, true); hdr->buflen = buflen; ++nvq->batched_xdp; return 0; err: page_frag_free(buf); return ret; } static void handle_tx_copy(struct vhost_net *net, struct socket *sock) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in; int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; int err; int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); do { bool busyloop_intr = false; if (nvq->done_idx == VHOST_NET_BATCH) vhost_tx_batch(net, nvq, sock, &msg); head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, &busyloop_intr); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } total_len += len; /* For simplicity, TX batching is only enabled if * sndbuf is unlimited. */ if (sock_can_batch) { err = vhost_net_build_xdp(nvq, &msg.msg_iter); if (!err) { goto done; } else if (unlikely(err != -ENOSPC)) { vhost_tx_batch(net, nvq, sock, &msg); vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; } /* We can't build XDP buff, go for single * packet path but let's flush batched * packets. */ vhost_tx_batch(net, nvq, sock, &msg); msg.msg_control = NULL; } else { if (tx_can_batch(vq, total_len)) msg.msg_flags |= MSG_MORE; else msg.msg_flags &= ~MSG_MORE; } err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; } pr_debug("Fail to send packet: err %d", err); } else if (unlikely(err != len)) pr_debug("Truncated TX packet: len %d != %zd\n", err, len); done: vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->done_idx].len = 0; ++nvq->done_idx; } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); vhost_tx_batch(net, nvq, sock, &msg); } static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in; int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; struct tun_msg_ctl ctl; size_t len, total_len = 0; int err; struct vhost_net_ubuf_ref *ubufs; struct ubuf_info_msgzc *ubuf; bool zcopy_used; int sent_pkts = 0; do { bool busyloop_intr; /* Release DMAs done buffers first */ vhost_zerocopy_signal_used(net, vq); busyloop_intr = false; head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, &busyloop_intr); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } zcopy_used = len >= VHOST_GOODCOPY_LEN && !vhost_exceeds_maxpend(net) && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { ubuf = nvq->ubuf_info + nvq->upend_idx; vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; ubuf->ubuf.ops = &vhost_ubuf_ops; ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG; refcount_set(&ubuf->ubuf.refcnt, 1); msg.msg_control = &ctl; ctl.type = TUN_MSG_UBUF; ctl.ptr = &ubuf->ubuf; msg.msg_controllen = sizeof(ctl); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } else { msg.msg_control = NULL; ubufs = NULL; } total_len += len; if (tx_can_batch(vq, total_len) && likely(!vhost_exceeds_maxpend(net))) { msg.msg_flags |= MSG_MORE; } else { msg.msg_flags &= ~MSG_MORE; } err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS; if (zcopy_used) { if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) vhost_net_ubuf_put(ubufs); if (retry) nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; else vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; } if (retry) { vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; } pr_debug("Fail to send packet: err %d", err); } else if (unlikely(err != len)) pr_debug("Truncated TX packet: " " len %d != %zd\n", err, len); if (!zcopy_used) vhost_add_used_and_signal(&net->dev, vq, head, 0); else vhost_zerocopy_signal_used(net, vq); vhost_net_tx_packet(net); } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); } /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; struct socket *sock; mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); sock = vhost_vq_get_backend(vq); if (!sock) goto out; if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&net->dev, vq); vhost_net_disable_vq(net, vq); if (vhost_sock_zcopy(sock)) handle_tx_zerocopy(net, sock); else handle_tx_copy(net, sock); out: mutex_unlock(&vq->mutex); } static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) { struct sk_buff *head; int len = 0; unsigned long flags; if (rvq->rx_ring) return vhost_net_buf_peek(rvq); spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); if (likely(head)) { len = head->len; if (skb_vlan_tag_present(head)) len += VLAN_HLEN; } spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); return len; } static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, bool *busyloop_intr) { struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *rvq = &rnvq->vq; struct vhost_virtqueue *tvq = &tnvq->vq; int len = peek_head_len(rnvq, sk); if (!len && rvq->busyloop_timeout) { /* Flush batched heads first */ vhost_net_signal_used(rnvq); /* Both tx vq and rx socket were polled here */ vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); len = peek_head_len(rnvq, sk); } return len; } /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue * @datalen - data length we'll be reading * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset * @quota - headcount quota, 1 for big buffer * returns number of buffer heads allocated, negative on error */ static int get_rx_bufs(struct vhost_virtqueue *vq, struct vring_used_elem *heads, int datalen, unsigned *iovcount, struct vhost_log *log, unsigned *log_num, unsigned int quota) { unsigned int out, in; int seg = 0; int headcount = 0; unsigned d; int r, nlogs = 0; /* len is always initialized before use since we are always called with * datalen > 0. */ u32 len; while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { r = -ENOBUFS; goto err; } r = vhost_get_vq_desc(vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); if (unlikely(r < 0)) goto err; d = r; if (d == vq->num) { r = 0; goto err; } if (unlikely(out || in <= 0)) { vq_err(vq, "unexpected descriptor format for RX: " "out %d, in %d\n", out, in); r = -EINVAL; goto err; } if (unlikely(log)) { nlogs += *log_num; log += *log_num; } heads[headcount].id = cpu_to_vhost32(vq, d); len = iov_length(vq->iov + seg, in); heads[headcount].len = cpu_to_vhost32(vq, len); datalen -= len; ++headcount; seg += in; } heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); *iovcount = seg; if (unlikely(log)) *log_num = nlogs; /* Detect overrun */ if (unlikely(datalen > 0)) { r = UIO_MAXIOV + 1; goto err; } return headcount; err: vhost_discard_vq_desc(vq, headcount); return r; } /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned in, log; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, /* FIXME: get and handle RX aux data. */ .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; struct virtio_net_hdr hdr = { .flags = 0, .gso_type = VIRTIO_NET_HDR_GSO_NONE }; size_t total_len = 0; int err, mergeable; s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; bool busyloop_intr = false; bool set_num_buffers; struct socket *sock; struct iov_iter fixup; __virtio16 num_buffers; int recv_pkts = 0; mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); sock = vhost_vq_get_backend(vq); if (!sock) goto out; if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&net->dev, vq); vhost_net_disable_vq(net, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); set_num_buffers = mergeable || vhost_has_feature(vq, VIRTIO_F_VERSION_1); do { sock_len = vhost_net_rx_peek_head_len(net, sock->sk, &busyloop_intr); if (!sock_len) break; sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) goto out; /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us * they refilled. */ goto out; } busyloop_intr = false; if (nvq->rx_ring) msg.msg_control = vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, 1, 1); err = sock->ops->recvmsg(sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, in, vhost_len); fixup = msg.msg_iter; if (unlikely((vhost_hlen))) { /* We will supply the header ourselves * TODO: support TSO. */ iov_iter_advance(&msg.msg_iter, vhost_hlen); } err = sock->ops->recvmsg(sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: * it's not supposed to do this usually, but might be hard * to prevent. Discard data we got (if any) and keep going. */ if (unlikely(err != sock_len)) { pr_debug("Discarded rx packet: " " len %d, expected %zd\n", err, sock_len); vhost_discard_vq_desc(vq, headcount); continue; } /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ if (unlikely(vhost_hlen)) { if (copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) { vq_err(vq, "Unable to write vnet_hdr " "at addr %p\n", vq->iov->iov_base); goto out; } } else { /* Header came from socket; we'll need to patch * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF */ iov_iter_advance(&fixup, sizeof(hdr)); } /* TODO: Should check and handle checksum. */ num_buffers = cpu_to_vhost16(vq, headcount); if (likely(set_num_buffers) && copy_to_iter(&num_buffers, sizeof num_buffers, &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); goto out; } nvq->done_idx += headcount; if (nvq->done_idx > VHOST_NET_BATCH) vhost_net_signal_used(nvq); if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); if (unlikely(busyloop_intr)) vhost_poll_queue(&vq->poll); else if (!sock_len) vhost_net_enable_vq(net, vq); out: vhost_net_signal_used(nvq); mutex_unlock(&vq->mutex); } static void handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); handle_tx(net); } static void handle_rx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); handle_rx(net); } static void handle_tx_net(struct vhost_work *work) { struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); handle_tx(net); } static void handle_rx_net(struct vhost_work *work) { struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); handle_rx(net); } static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; void **queue; struct xdp_buff *xdp; int i; n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!n) return -ENOMEM; vqs = kmalloc_array(VHOST_NET_VQ_MAX, sizeof(*vqs), GFP_KERNEL); if (!vqs) { kvfree(n); return -ENOMEM; } queue = kmalloc_array(VHOST_NET_BATCH, sizeof(void *), GFP_KERNEL); if (!queue) { kfree(vqs); kvfree(n); return -ENOMEM; } n->vqs[VHOST_NET_VQ_RX].rxq.queue = queue; xdp = kmalloc_array(VHOST_NET_BATCH, sizeof(*xdp), GFP_KERNEL); if (!xdp) { kfree(vqs); kvfree(n); kfree(queue); return -ENOMEM; } n->vqs[VHOST_NET_VQ_TX].xdp = xdp; dev = &n->dev; vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].ubufs = NULL; n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; n->vqs[i].batched_xdp = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; n->vqs[i].rx_ring = NULL; vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev, vqs[VHOST_NET_VQ_TX]); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev, vqs[VHOST_NET_VQ_RX]); f->private_data = n; page_frag_cache_init(&n->pf_cache); return 0; } static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct socket *sock; struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); mutex_lock(&vq->mutex); sock = vhost_vq_get_backend(vq); vhost_net_disable_vq(n, vq); vhost_vq_set_backend(vq, NULL); vhost_net_buf_unproduce(nvq); nvq->rx_ring = NULL; mutex_unlock(&vq->mutex); return sock; } static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, struct socket **rx_sock) { *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } static void vhost_net_flush(struct vhost_net *n) { vhost_dev_flush(&n->dev); if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); /* Wait for all lower device DMAs done. */ vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } static int vhost_net_release(struct inode *inode, struct file *f) { struct vhost_net *n = f->private_data; struct socket *tx_sock; struct socket *rx_sock; vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev); vhost_net_vq_reset(n); if (tx_sock) sockfd_put(tx_sock); if (rx_sock) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ synchronize_rcu(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); page_frag_cache_drain(&n->pf_cache); kvfree(n); return 0; } static struct socket *get_raw_socket(int fd) { int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) return ERR_PTR(-ENOTSOCK); /* Parameter checking */ if (sock->sk->sk_type != SOCK_RAW) { r = -ESOCKTNOSUPPORT; goto err; } if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } return sock; err: sockfd_put(sock); return ERR_PTR(r); } static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; ring = tap_get_ptr_ring(file); if (!IS_ERR(ring)) goto out; ring = NULL; out: return ring; } static struct socket *get_tap_socket(int fd) { struct file *file = fget(fd); struct socket *sock; if (!file) return ERR_PTR(-EBADF); sock = tun_get_socket(file); if (!IS_ERR(sock)) return sock; sock = tap_get_socket(file); if (IS_ERR(sock)) fput(file); return sock; } static struct socket *get_socket(int fd) { struct socket *sock; /* special case to disable backend */ if (fd == -1) return NULL; sock = get_raw_socket(fd); if (!IS_ERR(sock)) return sock; sock = get_tap_socket(fd); if (!IS_ERR(sock)) return sock; return ERR_PTR(-ENOTSOCK); } static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) { struct socket *sock, *oldsock; struct vhost_virtqueue *vq; struct vhost_net_virtqueue *nvq; struct vhost_net_ubuf_ref *ubufs, *oldubufs = NULL; int r; mutex_lock(&n->dev.mutex); r = vhost_dev_check_owner(&n->dev); if (r) goto err; if (index >= VHOST_NET_VQ_MAX) { r = -ENOBUFS; goto err; } vq = &n->vqs[index].vq; nvq = &n->vqs[index]; mutex_lock(&vq->mutex); if (fd == -1) vhost_clear_msg(&n->dev); /* Verify that ring has been setup correctly. */ if (!vhost_vq_access_ok(vq)) { r = -EFAULT; goto err_vq; } sock = get_socket(fd); if (IS_ERR(sock)) { r = PTR_ERR(sock); goto err_vq; } /* start polling new socket */ oldsock = vhost_vq_get_backend(vq); if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); if (IS_ERR(ubufs)) { r = PTR_ERR(ubufs); goto err_ubufs; } vhost_net_disable_vq(n, vq); vhost_vq_set_backend(vq, sock); vhost_net_buf_unproduce(nvq); r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); if (r) goto err_used; if (index == VHOST_NET_VQ_RX) { if (sock) nvq->rx_ring = get_tap_ptr_ring(sock->file); else nvq->rx_ring = NULL; } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; n->tx_packets = 0; n->tx_zcopy_err = 0; n->tx_flush = false; } mutex_unlock(&vq->mutex); if (oldubufs) { vhost_net_ubuf_put_wait_and_free(oldubufs); mutex_lock(&vq->mutex); vhost_zerocopy_signal_used(n, vq); mutex_unlock(&vq->mutex); } if (oldsock) { vhost_dev_flush(&n->dev); sockfd_put(oldsock); } mutex_unlock(&n->dev.mutex); return 0; err_used: vhost_vq_set_backend(vq, oldsock); vhost_net_enable_vq(n, vq); if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: if (sock) sockfd_put(sock); err_vq: mutex_unlock(&vq->mutex); err: mutex_unlock(&n->dev.mutex); return r; } static long vhost_net_reset_owner(struct vhost_net *n) { struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; struct vhost_iotlb *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; umem = vhost_dev_reset_owner_prepare(); if (!umem) { err = -ENOMEM; goto done; } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); if (tx_sock) sockfd_put(tx_sock); if (rx_sock) sockfd_put(rx_sock); return err; } static int vhost_net_set_features(struct vhost_net *n, u64 features) { size_t vhost_hlen, sock_hlen, hdr_len; int i; hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) ? sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr); if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { /* vhost provides vnet_hdr */ vhost_hlen = hdr_len; sock_hlen = 0; } else { /* socket provides vnet_hdr */ vhost_hlen = 0; sock_hlen = hdr_len; } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&n->dev)) goto out_unlock; if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { if (vhost_init_device_iotlb(&n->dev)) goto out_unlock; } for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); n->vqs[i].vq.acked_features = features; n->vqs[i].vhost_hlen = vhost_hlen; n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].vq.mutex); } mutex_unlock(&n->dev.mutex); return 0; out_unlock: mutex_unlock(&n->dev.mutex); return -EFAULT; } static long vhost_net_set_owner(struct vhost_net *n) { int r; mutex_lock(&n->dev.mutex); if (vhost_dev_has_owner(&n->dev)) { r = -EBUSY; goto out; } r = vhost_net_set_ubuf_info(n); if (r) goto out; r = vhost_dev_set_owner(&n->dev); if (r) vhost_net_clear_ubuf_info(n); vhost_net_flush(n); out: mutex_unlock(&n->dev.mutex); return r; } static long vhost_net_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { struct vhost_net *n = f->private_data; void __user *argp = (void __user *)arg; u64 __user *featurep = argp; struct vhost_vring_file backend; u64 features; int r; switch (ioctl) { case VHOST_NET_SET_BACKEND: if (copy_from_user(&backend, argp, sizeof backend)) return -EFAULT; return vhost_net_set_backend(n, backend.index, backend.fd); case VHOST_GET_FEATURES: features = VHOST_NET_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; if (features & ~VHOST_NET_FEATURES) return -EOPNOTSUPP; return vhost_net_set_features(n, features); case VHOST_GET_BACKEND_FEATURES: features = VHOST_NET_BACKEND_FEATURES; if (copy_to_user(featurep, &features, sizeof(features))) return -EFAULT; return 0; case VHOST_SET_BACKEND_FEATURES: if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; if (features & ~VHOST_NET_BACKEND_FEATURES) return -EOPNOTSUPP; vhost_set_backend_features(&n->dev, features); return 0; case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); case VHOST_SET_OWNER: return vhost_net_set_owner(n); default: mutex_lock(&n->dev.mutex); r = vhost_dev_ioctl(&n->dev, ioctl, argp); if (r == -ENOIOCTLCMD) r = vhost_vring_ioctl(&n->dev, ioctl, argp); else vhost_net_flush(n); mutex_unlock(&n->dev.mutex); return r; } } static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; int noblock = file->f_flags & O_NONBLOCK; return vhost_chr_read_iter(dev, to, noblock); } static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; return vhost_chr_write_iter(dev, from); } static __poll_t vhost_net_chr_poll(struct file *file, poll_table *wait) { struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; return vhost_chr_poll(file, dev, wait); } static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, .read_iter = vhost_net_chr_read_iter, .write_iter = vhost_net_chr_write_iter, .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = vhost_net_open, .llseek = noop_llseek, }; static struct miscdevice vhost_net_misc = { .minor = VHOST_NET_MINOR, .name = "vhost-net", .fops = &vhost_net_fops, }; static int __init vhost_net_init(void) { if (experimental_zcopytx) vhost_net_enable_zcopy(VHOST_NET_VQ_TX); return misc_register(&vhost_net_misc); } module_init(vhost_net_init); static void __exit vhost_net_exit(void) { misc_deregister(&vhost_net_misc); } module_exit(vhost_net_exit); MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR); MODULE_ALIAS("devname:vhost-net"); |
213 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/signalfd.h * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * */ #ifndef _LINUX_SIGNALFD_H #define _LINUX_SIGNALFD_H #include <uapi/linux/signalfd.h> #include <linux/sched/signal.h> #ifdef CONFIG_SIGNALFD /* * Deliver the signal to listening signalfd. */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh))) wake_up(&tsk->sighand->signalfd_wqh); } extern void signalfd_cleanup(struct sighand_struct *sighand); #else /* CONFIG_SIGNALFD */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { } static inline void signalfd_cleanup(struct sighand_struct *sighand) { } #endif /* CONFIG_SIGNALFD */ #endif /* _LINUX_SIGNALFD_H */ |
311 312 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_ENTRYKVM_H #define __LINUX_ENTRYKVM_H #include <linux/static_call_types.h> #include <linux/resume_user_mode.h> #include <linux/syscalls.h> #include <linux/seccomp.h> #include <linux/sched.h> #include <linux/tick.h> /* Transfer to guest mode work */ #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK #ifndef ARCH_XFER_TO_GUEST_MODE_WORK # define ARCH_XFER_TO_GUEST_MODE_WORK (0) #endif #define XFER_TO_GUEST_MODE_WORK \ (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; /** * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest * mode work handling function. * @vcpu: Pointer to current's VCPU data * @ti_work: Cached TIF flags gathered in xfer_to_guest_mode_handle_work() * * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be * replaced by architecture specific code. */ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, unsigned long ti_work); #ifndef arch_xfer_to_guest_mode_work static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, unsigned long ti_work) { return 0; } #endif /** * xfer_to_guest_mode_handle_work - Check and handle pending work which needs * to be handled before going to guest mode * @vcpu: Pointer to current's VCPU data * * Returns: 0 or an error code */ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); /** * xfer_to_guest_mode_prepare - Perform last minute preparation work that * need to be handled while IRQs are disabled * upon entering to guest. * * Has to be invoked with interrupts disabled before the last call * to xfer_to_guest_mode_work_pending(). */ static inline void xfer_to_guest_mode_prepare(void) { lockdep_assert_irqs_disabled(); tick_nohz_user_enter_prepare(); } /** * __xfer_to_guest_mode_work_pending - Check if work is pending * * Returns: True if work pending, False otherwise. * * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from * interrupt enabled code for racy quick checks with care. */ static inline bool __xfer_to_guest_mode_work_pending(void) { unsigned long ti_work = read_thread_flags(); return !!(ti_work & XFER_TO_GUEST_MODE_WORK); } /** * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be * handled before returning to guest mode * * Returns: True if work pending, False otherwise. * * Has to be invoked with interrupts disabled before the transition to * guest mode. */ static inline bool xfer_to_guest_mode_work_pending(void) { lockdep_assert_irqs_disabled(); return __xfer_to_guest_mode_work_pending(); } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ #endif |
4 3 3 3 2 1 7 15 3 8 4 2 6 4 10 8 4 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 | // SPDX-License-Identifier: GPL-2.0 /* * Framework for userspace DMA-BUF allocations * * Copyright (C) 2011 Google, Inc. * Copyright (C) 2019 Linaro Ltd. */ #include <linux/cdev.h> #include <linux/device.h> #include <linux/dma-buf.h> #include <linux/dma-heap.h> #include <linux/err.h> #include <linux/list.h> #include <linux/nospec.h> #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/xarray.h> #include <uapi/linux/dma-heap.h> #define DEVNAME "dma_heap" #define NUM_HEAP_MINORS 128 /** * struct dma_heap - represents a dmabuf heap in the system * @name: used for debugging/device-node name * @ops: ops struct for this heap * @priv: private data for this heap * @heap_devt: heap device node * @list: list head connecting to list of heaps * @heap_cdev: heap char device * * Represents a heap of memory from which buffers can be made. */ struct dma_heap { const char *name; const struct dma_heap_ops *ops; void *priv; dev_t heap_devt; struct list_head list; struct cdev heap_cdev; }; static LIST_HEAD(heap_list); static DEFINE_MUTEX(heap_list_lock); static dev_t dma_heap_devt; static struct class *dma_heap_class; static DEFINE_XARRAY_ALLOC(dma_heap_minors); static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, u32 fd_flags, u64 heap_flags) { struct dma_buf *dmabuf; int fd; /* * Allocations from all heaps have to begin * and end on page boundaries. */ len = PAGE_ALIGN(len); if (!len) return -EINVAL; dmabuf = heap->ops->allocate(heap, len, fd_flags, heap_flags); if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); fd = dma_buf_fd(dmabuf, fd_flags); if (fd < 0) { dma_buf_put(dmabuf); /* just return, as put will call release and that will free */ } return fd; } static int dma_heap_open(struct inode *inode, struct file *file) { struct dma_heap *heap; heap = xa_load(&dma_heap_minors, iminor(inode)); if (!heap) { pr_err("dma_heap: minor %d unknown.\n", iminor(inode)); return -ENODEV; } /* instance data as context */ file->private_data = heap; nonseekable_open(inode, file); return 0; } static long dma_heap_ioctl_allocate(struct file *file, void *data) { struct dma_heap_allocation_data *heap_allocation = data; struct dma_heap *heap = file->private_data; int fd; if (heap_allocation->fd) return -EINVAL; if (heap_allocation->fd_flags & ~DMA_HEAP_VALID_FD_FLAGS) return -EINVAL; if (heap_allocation->heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS) return -EINVAL; fd = dma_heap_buffer_alloc(heap, heap_allocation->len, heap_allocation->fd_flags, heap_allocation->heap_flags); if (fd < 0) return fd; heap_allocation->fd = fd; return 0; } static unsigned int dma_heap_ioctl_cmds[] = { DMA_HEAP_IOCTL_ALLOC, }; static long dma_heap_ioctl(struct file *file, unsigned int ucmd, unsigned long arg) { char stack_kdata[128]; char *kdata = stack_kdata; unsigned int kcmd; unsigned int in_size, out_size, drv_size, ksize; int nr = _IOC_NR(ucmd); int ret = 0; if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds)) return -EINVAL; nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds)); /* Get the kernel ioctl cmd that matches */ kcmd = dma_heap_ioctl_cmds[nr]; /* Figure out the delta between user cmd size and kernel cmd size */ drv_size = _IOC_SIZE(kcmd); out_size = _IOC_SIZE(ucmd); in_size = out_size; if ((ucmd & kcmd & IOC_IN) == 0) in_size = 0; if ((ucmd & kcmd & IOC_OUT) == 0) out_size = 0; ksize = max(max(in_size, out_size), drv_size); /* If necessary, allocate buffer for ioctl argument */ if (ksize > sizeof(stack_kdata)) { kdata = kmalloc(ksize, GFP_KERNEL); if (!kdata) return -ENOMEM; } if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) { ret = -EFAULT; goto err; } /* zero out any difference between the kernel/user structure size */ if (ksize > in_size) memset(kdata + in_size, 0, ksize - in_size); switch (kcmd) { case DMA_HEAP_IOCTL_ALLOC: ret = dma_heap_ioctl_allocate(file, kdata); break; default: ret = -ENOTTY; goto err; } if (copy_to_user((void __user *)arg, kdata, out_size) != 0) ret = -EFAULT; err: if (kdata != stack_kdata) kfree(kdata); return ret; } static const struct file_operations dma_heap_fops = { .owner = THIS_MODULE, .open = dma_heap_open, .unlocked_ioctl = dma_heap_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = dma_heap_ioctl, #endif }; /** * dma_heap_get_drvdata - get per-heap driver data * @heap: DMA-Heap to retrieve private data for * * Returns: * The per-heap data for the heap. */ void *dma_heap_get_drvdata(struct dma_heap *heap) { return heap->priv; } /** * dma_heap_get_name - get heap name * @heap: DMA-Heap to retrieve the name of * * Returns: * The char* for the heap name. */ const char *dma_heap_get_name(struct dma_heap *heap) { return heap->name; } /** * dma_heap_add - adds a heap to dmabuf heaps * @exp_info: information needed to register this heap */ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) { struct dma_heap *heap, *h, *err_ret; struct device *dev_ret; unsigned int minor; int ret; if (!exp_info->name || !strcmp(exp_info->name, "")) { pr_err("dma_heap: Cannot add heap without a name\n"); return ERR_PTR(-EINVAL); } if (!exp_info->ops || !exp_info->ops->allocate) { pr_err("dma_heap: Cannot add heap with invalid ops struct\n"); return ERR_PTR(-EINVAL); } heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return ERR_PTR(-ENOMEM); heap->name = exp_info->name; heap->ops = exp_info->ops; heap->priv = exp_info->priv; /* Find unused minor number */ ret = xa_alloc(&dma_heap_minors, &minor, heap, XA_LIMIT(0, NUM_HEAP_MINORS - 1), GFP_KERNEL); if (ret < 0) { pr_err("dma_heap: Unable to get minor number for heap\n"); err_ret = ERR_PTR(ret); goto err0; } /* Create device */ heap->heap_devt = MKDEV(MAJOR(dma_heap_devt), minor); cdev_init(&heap->heap_cdev, &dma_heap_fops); ret = cdev_add(&heap->heap_cdev, heap->heap_devt, 1); if (ret < 0) { pr_err("dma_heap: Unable to add char device\n"); err_ret = ERR_PTR(ret); goto err1; } dev_ret = device_create(dma_heap_class, NULL, heap->heap_devt, NULL, heap->name); if (IS_ERR(dev_ret)) { pr_err("dma_heap: Unable to create device\n"); err_ret = ERR_CAST(dev_ret); goto err2; } mutex_lock(&heap_list_lock); /* check the name is unique */ list_for_each_entry(h, &heap_list, list) { if (!strcmp(h->name, exp_info->name)) { mutex_unlock(&heap_list_lock); pr_err("dma_heap: Already registered heap named %s\n", exp_info->name); err_ret = ERR_PTR(-EINVAL); goto err3; } } /* Add heap to the list */ list_add(&heap->list, &heap_list); mutex_unlock(&heap_list_lock); return heap; err3: device_destroy(dma_heap_class, heap->heap_devt); err2: cdev_del(&heap->heap_cdev); err1: xa_erase(&dma_heap_minors, minor); err0: kfree(heap); return err_ret; } static char *dma_heap_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev)); } static int dma_heap_init(void) { int ret; ret = alloc_chrdev_region(&dma_heap_devt, 0, NUM_HEAP_MINORS, DEVNAME); if (ret) return ret; dma_heap_class = class_create(DEVNAME); if (IS_ERR(dma_heap_class)) { unregister_chrdev_region(dma_heap_devt, NUM_HEAP_MINORS); return PTR_ERR(dma_heap_class); } dma_heap_class->devnode = dma_heap_devnode; return 0; } subsys_initcall(dma_heap_init); |
467 268 312 312 310 312 312 2 2 2 2 2 180 180 180 178 3 177 161 25 178 179 180 179 244 244 75 6 178 174 78 241 1 1 243 6 238 238 239 6 244 243 243 243 244 111 162 111 161 89 107 158 8 114 114 54 70 103 98 2 6 2 6 3 114 114 168 168 114 6 239 238 108 162 15 15 15 15 15 11 3 14 87 86 87 12 10 304 52 272 91 48 49 31 31 201 285 9 177 1 48 219 146 119 277 225 99 191 349 227 196 9 3 22 20 2 6 16 22 268 15 259 336 314 9 9 9 9 9 38 58 22 7 2 30 10 10 9 71 178 180 10 1 2 6 1 9 10 10 1 1 8 10 363 331 102 23 7 10 7 238 16 226 2 6 6 4 3 301 67 175 4 71 70 71 256 256 30 3 27 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2002, 2004 * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * Copyright (c) 2002-2003 Intel Corp. * * This file is part of the SCTP kernel implementation * * SCTP over IPv6. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Le Yanqun <yanqun.le@nokia.com> * Hui Huang <hui.huang@nokia.com> * La Monte H.P. Yarroll <piggy@acm.org> * Sridhar Samudrala <sri@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * * Based on: * linux/net/ipv6/tcp_ipv6.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/ipsec.h> #include <linux/slab.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/seq_file.h> #include <net/protocol.h> #include <net/ndisc.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/transp_v6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/inet_common.h> #include <net/inet_ecn.h> #include <net/sctp/sctp.h> #include <net/udp_tunnel.h> #include <linux/uaccess.h> static inline int sctp_v6_addr_match_len(union sctp_addr *s1, union sctp_addr *s2); static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, __be16 port); static int sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2); /* Event handler for inet6 address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same * time and thus corrupt the list. * The reader side is protected with RCU. */ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; struct net *net = dev_net(ifa->idev->dev); int found = 0; switch (ev) { case NETDEV_UP: addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr) && addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { found = 1; addr->valid = 0; list_del_rcu(&addr->list); sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); break; } } spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; } return NOTIFY_DONE; } static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inet6addr_event, }; static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, __u8 type, __u8 code, __u32 info) { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; int err = 0; switch (type) { case ICMPV6_PKT_TOOBIG: if (ip6_sk_accept_pmtu(sk)) sctp_icmp_frag_needed(sk, asoc, t, info); return; case ICMPV6_PARAMPROB: if (ICMPV6_UNK_NEXTHDR == code) { sctp_icmp_proto_unreachable(sk, asoc, t); return; } break; case NDISC_REDIRECT: sctp_icmp_redirect(sk, t, skb); return; default: break; } icmpv6_err_convert(type, code, &err); if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } } /* ICMP error handler. */ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct sctp_transport *transport; struct sctp_association *asoc; __u16 saveip, savesctp; struct sock *sk; /* Fix up skb to look at the embedded net header. */ saveip = skb->network_header; savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, offset); sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original pointers. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } sctp_v6_err_handle(transport, skb, type, code, ntohl(info)); sctp_err_finish(sk, transport); return 0; } int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct sctp_association *asoc; struct sctp_transport *t; struct icmp6hdr *hdr; __u32 info = 0; skb->transport_header += sizeof(struct udphdr); sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } skb->transport_header -= sizeof(struct udphdr); hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr)); if (hdr->icmp6_type == NDISC_REDIRECT) { /* can't be handled without outer ip6hdr known, leave it to udpv6_err */ sctp_err_finish(sk, t); return 0; } if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG) info = ntohl(hdr->icmp6_mtu); sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info); sctp_err_finish(sk, t); return 1; } static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) { struct dst_entry *dst = dst_clone(t->dst); struct flowi6 *fl6 = &t->fl.u.ip6; struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); __u8 tclass = np->tclass; __be32 label; pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl6->saddr, &fl6->daddr); if (t->dscp & SCTP_DSCP_SET_MASK) tclass = t->dscp & SCTP_DSCP_VAL_MASK; if (INET_ECN_is_capable(tclass)) IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(t->param_flags & SPP_PMTUD_ENABLE)) skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); if (!t->encap_port || !sctp_sk(sk)->udp_port) { int res; skb_dst_set(skb, dst); rcu_read_lock(); res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } if (skb_is_gso(skb)) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; skb->encapsulation = 1; skb_reset_inner_mac_header(skb); skb_reset_inner_transport_header(skb); skb_set_inner_ipproto(skb, IPPROTO_SCTP); label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr, tclass, ip6_dst_hoplimit(dst), label, sctp_sk(sk)->udp_port, t->encap_port, false); } /* Returns the dst cache entry for the given source and destination ip * addresses. */ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct flowi *fl, struct sock *sk) { struct sctp_association *asoc = t->asoc; struct dst_entry *dst = NULL; struct flowi _fl; struct flowi6 *fl6 = &_fl.u.ip6; struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; enum sctp_scope scope; __u8 matchlen = 0; memset(&_fl, 0, sizeof(_fl)); fl6->daddr = daddr->v6.sin6_addr; fl6->fl6_dport = daddr->v6.sin6_port; fl6->flowi6_proto = IPPROTO_SCTP; if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl6->flowi6_oif = daddr->v6.sin6_scope_id; else if (asoc) fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if; if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK) fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK); if (inet6_test_bit(SNDFLOW, sk) && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) goto out; fl6_sock_release(flowlabel); } pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr); if (asoc) fl6->fl6_sport = htons(asoc->base.bind_addr.port); if (saddr) { fl6->saddr = saddr->v6.sin6_addr; if (!fl6->fl6_sport) fl6->fl6_sport = saddr->v6.sin6_port; pr_debug("src=%pI6 - ", &fl6->saddr); } rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (!asoc || saddr) { t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); goto out; } bp = &asoc->base.bind_addr; scope = sctp_scope(daddr); /* ip6_dst_lookup has filled in the fl6->saddr for us. Check * to see if we can use it. */ if (!IS_ERR(dst)) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || laddr->state == SCTP_ADDR_DEL || (laddr->state != SCTP_ADDR_SRC && !asoc->src_out_of_asoc_ok)) continue; /* Do not compare against v4 addrs */ if ((laddr->a.sa.sa_family == AF_INET6) && (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) { rcu_read_unlock(); t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); goto out; } } rcu_read_unlock(); /* None of the bound addresses match the source address of the * dst. So release it. */ dst_release(dst); dst = NULL; } /* Walk through the bind address list and try to get the * best source address for a given destination. */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { struct dst_entry *bdst; __u8 bmatchlen; if (!laddr->valid || laddr->state != SCTP_ADDR_SRC || laddr->a.sa.sa_family != AF_INET6 || scope > sctp_scope(&laddr->a)) continue; fl6->saddr = laddr->a.v6.sin6_addr; fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); bdst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(bdst)) continue; if (ipv6_chk_addr(dev_net(bdst->dev), &laddr->a.v6.sin6_addr, bdst->dev, 1)) { if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); break; } bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); if (matchlen > bmatchlen) { dst_release(bdst); continue; } if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; matchlen = bmatchlen; t->dst = dst; memcpy(fl, &_fl, sizeof(_fl)); } rcu_read_unlock(); out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; rt = dst_rt6_info(dst); t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, &fl->u.ip6.saddr); } else { t->dst = NULL; pr_debug("no route\n"); } } /* Returns the number of consecutive initial bits that match in the 2 ipv6 * addresses. */ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, union sctp_addr *s2) { return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr); } /* Fills in the source address(saddr) based on the destination address(daddr) * and asoc's bind address list. */ static void sctp_v6_get_saddr(struct sctp_sock *sk, struct sctp_transport *t, struct flowi *fl) { struct flowi6 *fl6 = &fl->u.ip6; union sctp_addr *saddr = &t->saddr; pr_debug("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); if (t->dst) { saddr->v6.sin6_family = AF_INET6; saddr->v6.sin6_addr = fl6->saddr; } } /* Make a copy of all potential local addresses. */ static void sctp_v6_copy_addrlist(struct list_head *addrlist, struct net_device *dev) { struct inet6_dev *in6_dev; struct inet6_ifaddr *ifp; struct sctp_sockaddr_entry *addr; rcu_read_lock(); if ((in6_dev = __in6_dev_get(dev)) == NULL) { rcu_read_unlock(); return; } read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; INIT_LIST_HEAD(&addr->list); list_add_tail(&addr->list, addrlist); } } read_unlock_bh(&in6_dev->lock); rcu_read_unlock(); } /* Copy over any ip options */ static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk) { struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct ipv6_txoptions *opt; newnp = inet6_sk(newsk); rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); if (!opt) pr_err("%s: Failed to copy ip options\n", __func__); } RCU_INIT_POINTER(newnp->opt, opt); rcu_read_unlock(); } /* Account for the IP options */ static int sctp_v6_ip_options_len(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt; int len = 0; rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) len = opt->opt_flen + opt->opt_nflen; rcu_read_unlock(); return len; } /* Initialize a sockaddr_storage from in incoming skb. */ static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { /* Always called on head skb, so this is safe */ struct sctphdr *sh = sctp_hdr(skb); struct sockaddr_in6 *sa = &addr->v6; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; /* FIXME */ addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif; if (is_saddr) { sa->sin6_port = sh->source; sa->sin6_addr = ipv6_hdr(skb)->saddr; } else { sa->sin6_port = sh->dest; sa->sin6_addr = ipv6_hdr(skb)->daddr; } } /* Initialize an sctp_addr from a socket. */ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET) { sk->sk_v6_rcv_saddr.s6_addr32[0] = 0; sk->sk_v6_rcv_saddr.s6_addr32[1] = 0; sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); sk->sk_v6_rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { sk->sk_v6_rcv_saddr = addr->v6.sin6_addr; } } /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET) { sk->sk_v6_daddr.s6_addr32[0] = 0; sk->sk_v6_daddr.s6_addr32[1] = 0; sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff); sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { sk->sk_v6_daddr = addr->v6.sin6_addr; } } /* Initialize a sctp_addr from an address parameter. */ static bool sctp_v6_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param, __be16 port, int iif) { if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param)) return false; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; addr->v6.sin6_flowinfo = 0; /* BUG */ addr->v6.sin6_addr = param->v6.addr; addr->v6.sin6_scope_id = iif; return true; } /* Initialize an address parameter from a sctp_addr and return the length * of the address parameter. */ static int sctp_v6_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { int length = sizeof(struct sctp_ipv6addr_param); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); param->v6.addr = addr->v6.sin6_addr; return length; } /* Initialize a sctp_addr from struct in6_addr. */ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, __be16 port) { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; addr->v6.sin6_scope_id = 0; } static int __sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2) { if (addr1->sa.sa_family != addr2->sa.sa_family) { if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET6 && ipv6_addr_v4mapped(&addr2->v6.sin6_addr) && addr2->v6.sin6_addr.s6_addr32[3] == addr1->v4.sin_addr.s_addr) return 1; if (addr2->sa.sa_family == AF_INET && addr1->sa.sa_family == AF_INET6 && ipv6_addr_v4mapped(&addr1->v6.sin6_addr) && addr1->v6.sin6_addr.s6_addr32[3] == addr2->v4.sin_addr.s_addr) return 1; return 0; } if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) return 0; /* If this is a linklocal address, compare the scope_id. */ if ((ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) && addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id && addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id) return 0; return 1; } /* Compare addresses exactly. * v4-mapped-v6 is also in consideration. */ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2) { return __sctp_v6_cmp_addr(addr1, addr2) && addr1->v6.sin6_port == addr2->v6.sin6_port; } /* Initialize addr struct to INADDR_ANY. */ static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port) { memset(addr, 0x00, sizeof(union sctp_addr)); addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; } /* Is this a wildcard address? */ static int sctp_v6_is_any(const union sctp_addr *addr) { return ipv6_addr_any(&addr->v6.sin6_addr); } /* Should this be available for binding? */ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) { const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr; struct sock *sk = &sp->inet.sk; struct net *net = sock_net(sk); struct net_device *dev = NULL; int type, res, bound_dev_if; type = ipv6_addr_type(in6); if (IPV6_ADDR_ANY == type) return 1; if (type == IPV6_ADDR_MAPPED) { if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); return sctp_get_af_specific(AF_INET)->available(addr, sp); } if (!(type & IPV6_ADDR_UNICAST)) return 0; rcu_read_lock(); bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); if (bound_dev_if) { res = 0; dev = dev_get_by_index_rcu(net, bound_dev_if); if (!dev) goto out; } res = ipv6_can_nonlocal_bind(net, &sp->inet) || ipv6_chk_addr(net, in6, dev, 0); out: rcu_read_unlock(); return res; } /* This function checks if the address is a valid address to be used for * SCTP. * * Output: * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast. */ static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp, const struct sk_buff *skb) { int ret = ipv6_addr_type(&addr->v6.sin6_addr); /* Support v4-mapped-v6 address. */ if (ret == IPV6_ADDR_MAPPED) { /* Note: This routine is used in input, so v4-mapped-v6 * are disallowed here when there is no sctp_sock. */ if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); return sctp_get_af_specific(AF_INET)->addr_valid(addr, sp, skb); } /* Is this a non-unicast address */ if (!(ret & IPV6_ADDR_UNICAST)) return 0; return 1; } /* What is the scope of 'addr'? */ static enum sctp_scope sctp_v6_scope(union sctp_addr *addr) { enum sctp_scope retval; int v6scope; /* The IPv6 scope is really a set of bit fields. * See IFA_* in <net/if_inet6.h>. Map to a generic SCTP scope. */ v6scope = ipv6_addr_scope(&addr->v6.sin6_addr); switch (v6scope) { case IFA_HOST: retval = SCTP_SCOPE_LOOPBACK; break; case IFA_LINK: retval = SCTP_SCOPE_LINK; break; case IFA_SITE: retval = SCTP_SCOPE_PRIVATE; break; default: retval = SCTP_SCOPE_GLOBAL; break; } return retval; } /* Create and initialize a new sk for the socket to be returned by accept(). */ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sctp_association *asoc, bool kern) { struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); if (!newsk) goto out; sock_init_data(NULL, newsk); sctp_copy_sock(newsk, sk, asoc); sock_reset_flag(sk, SOCK_ZAPPED); newsctp6sk = (struct sctp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newsctp6sk->inet6; sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; sctp_v6_copy_ip_options(sk, newsk); /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() * and getpeername(). */ sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr; if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); newsk = NULL; } out: return newsk; } /* Format a sockaddr for return to user space. This makes sure the return is * AF_INET or AF_INET6 depending on the SCTP_I_WANT_MAPPED_V4_ADDR option. */ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { if (sp->v4mapped) { if (addr->sa.sa_family == AF_INET) sctp_v4_map_v6(addr); } else { if (addr->sa.sa_family == AF_INET6 && ipv6_addr_v4mapped(&addr->v6.sin6_addr)) sctp_v6_map_v4(addr); } if (addr->sa.sa_family == AF_INET) { memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } return sizeof(struct sockaddr_in6); } /* Where did this skb come from? */ static int sctp_v6_skb_iif(const struct sk_buff *skb) { return inet6_iif(skb); } static int sctp_v6_skb_sdif(const struct sk_buff *skb) { return inet6_sdif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v6_is_ce(const struct sk_buff *skb) { return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20); } /* Dump the v6 addr to the seq file. */ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) { seq_printf(seq, "%pI6 ", &addr->v6.sin6_addr); } static void sctp_v6_ecn_capable(struct sock *sk) { inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } /* Initialize a PF_INET msgname from a ulpevent. */ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event, char *msgname, int *addrlen) { union sctp_addr *addr; struct sctp_association *asoc; union sctp_addr *paddr; if (!msgname) return; addr = (union sctp_addr *)msgname; asoc = event->asoc; paddr = &asoc->peer.primary_addr; if (paddr->sa.sa_family == AF_INET) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = htons(asoc->peer.port); addr->v4.sin_addr = paddr->v4.sin_addr; } else { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; if (ipv6_addr_type(&paddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) addr->v6.sin6_scope_id = paddr->v6.sin6_scope_id; else addr->v6.sin6_scope_id = 0; addr->v6.sin6_port = htons(asoc->peer.port); addr->v6.sin6_addr = paddr->v6.sin6_addr; } *addrlen = sctp_v6_addr_to_user(sctp_sk(asoc->base.sk), addr); } /* Initialize a msg_name from an inbound skb. */ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, int *addr_len) { union sctp_addr *addr; struct sctphdr *sh; if (!msgname) return; addr = (union sctp_addr *)msgname; sh = sctp_hdr(skb); if (ip_hdr(skb)->version == 4) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = sh->source; addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr; } else { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_port = sh->source; addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); else addr->v6.sin6_scope_id = 0; } *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); } /* Do we support this AF? */ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) { switch (family) { case AF_INET6: return 1; /* v4-mapped-v6 addresses */ case AF_INET: if (!ipv6_only_sock(sctp_opt2sk(sp))) return 1; fallthrough; default: return 0; } } /* Address matching with wildcards allowed. This extra level * of indirection lets us choose whether a PF_INET6 should * disallow any v4 addresses if we so choose. */ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2, struct sctp_sock *opt) { struct sock *sk = sctp_opt2sk(opt); struct sctp_af *af1, *af2; af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); if (!af1 || !af2) return 0; /* If the socket is IPv6 only, v4 addrs will not match */ if (ipv6_only_sock(sk) && af1 != af2) return 0; /* Today, wildcard AF_INET/AF_INET6. */ if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET) return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr; return __sctp_v6_cmp_addr(addr1, addr2); } /* Verify that the provided sockaddr looks bindable. Common verification, * has already been taken care of. */ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) { struct sctp_af *af; /* ASSERT: address family has already been verified. */ if (addr->sa.sa_family != AF_INET6) af = sctp_get_af_specific(addr->sa.sa_family); else { int type = ipv6_addr_type(&addr->v6.sin6_addr); struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { struct net *net; if (!addr->v6.sin6_scope_id) return 0; net = sock_net(&opt->inet.sk); rcu_read_lock(); dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); if (!dev || !(ipv6_can_nonlocal_bind(net, &opt->inet) || ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0))) { rcu_read_unlock(); return 0; } rcu_read_unlock(); } af = opt->pf->af; } return af->available(addr, opt); } /* Verify that the provided sockaddr looks sendable. Common verification, * has already been taken care of. */ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) { struct sctp_af *af = NULL; /* ASSERT: address family has already been verified. */ if (addr->sa.sa_family != AF_INET6) af = sctp_get_af_specific(addr->sa.sa_family); else { int type = ipv6_addr_type(&addr->v6.sin6_addr); struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk), addr->v6.sin6_scope_id); rcu_read_unlock(); if (!dev) return 0; } af = opt->pf->af; } return af != NULL; } /* Fill in Supported Address Type information for INIT and INIT-ACK * chunks. Note: In the future, we may want to look at sock options * to determine whether a PF_INET6 socket really wants to have IPV4 * addresses. * Returns number of addresses supported. */ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, __be16 *types) { types[0] = SCTP_PARAM_IPV6_ADDRESS; if (!opt || !ipv6_only_sock(sctp_opt2sk(opt))) { types[1] = SCTP_PARAM_IPV4_ADDRESS; return 2; } return 1; } /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */ static int sctp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { int rc; rc = inet6_getname(sock, uaddr, peer); if (rc < 0) return rc; rc = sctp_v6_addr_to_user(sctp_sk(sock->sk), (union sctp_addr *)uaddr); return rc; } static const struct proto_ops inet6_seqpacket_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, .poll = sctp_poll, .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, .listen = sctp_inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static struct inet_protosw sctpv6_seqpacket_protosw = { .type = SOCK_SEQPACKET, .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctpv6_stream_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, .flags = SCTP_PROTOSW_FLAG, }; static int sctp6_rcv(struct sk_buff *skb) { SCTP_INPUT_CB(skb)->encap_port = 0; return sctp_rcv(skb) ? -1 : 0; } static const struct inet6_protocol sctpv6_protocol = { .handler = sctp6_rcv, .err_handler = sctp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; static struct sctp_af sctp_af_inet6 = { .sa_family = AF_INET6, .sctp_xmit = sctp_v6_xmit, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .get_dst = sctp_v6_get_dst, .get_saddr = sctp_v6_get_saddr, .copy_addrlist = sctp_v6_copy_addrlist, .from_skb = sctp_v6_from_skb, .from_sk = sctp_v6_from_sk, .from_addr_param = sctp_v6_from_addr_param, .to_addr_param = sctp_v6_to_addr_param, .cmp_addr = sctp_v6_cmp_addr, .scope = sctp_v6_scope, .addr_valid = sctp_v6_addr_valid, .inaddr_any = sctp_v6_inaddr_any, .is_any = sctp_v6_is_any, .available = sctp_v6_available, .skb_iif = sctp_v6_skb_iif, .skb_sdif = sctp_v6_skb_sdif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, .ecn_capable = sctp_v6_ecn_capable, .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), .ip_options_len = sctp_v6_ip_options_len, }; static struct sctp_pf sctp_pf_inet6 = { .event_msgname = sctp_inet6_event_msgname, .skb_msgname = sctp_inet6_skb_msgname, .af_supported = sctp_inet6_af_supported, .cmp_addr = sctp_inet6_cmp_addr, .bind_verify = sctp_inet6_bind_verify, .send_verify = sctp_inet6_send_verify, .supported_addrs = sctp_inet6_supported_addrs, .create_accept_sk = sctp_v6_create_accept_sk, .addr_to_user = sctp_v6_addr_to_user, .to_sk_saddr = sctp_v6_to_sk_saddr, .to_sk_daddr = sctp_v6_to_sk_daddr, .copy_ip_options = sctp_v6_copy_ip_options, .af = &sctp_af_inet6, }; /* Initialize IPv6 support and register with socket layer. */ void sctp_v6_pf_init(void) { /* Register the SCTP specific PF_INET6 functions. */ sctp_register_pf(&sctp_pf_inet6, PF_INET6); /* Register the SCTP specific AF_INET6 functions. */ sctp_register_af(&sctp_af_inet6); } void sctp_v6_pf_exit(void) { list_del(&sctp_af_inet6.list); } /* Initialize IPv6 support and register with socket layer. */ int sctp_v6_protosw_init(void) { int rc; rc = proto_register(&sctpv6_prot, 1); if (rc) return rc; /* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */ inet6_register_protosw(&sctpv6_seqpacket_protosw); inet6_register_protosw(&sctpv6_stream_protosw); return 0; } void sctp_v6_protosw_exit(void) { inet6_unregister_protosw(&sctpv6_seqpacket_protosw); inet6_unregister_protosw(&sctpv6_stream_protosw); proto_unregister(&sctpv6_prot); } /* Register with inet6 layer. */ int sctp_v6_add_protocol(void) { /* Register notifier for inet6 address additions/deletions. */ register_inet6addr_notifier(&sctp_inet6addr_notifier); if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0) return -EAGAIN; return 0; } /* Unregister with inet6 layer. */ void sctp_v6_del_protocol(void) { inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP); unregister_inet6addr_notifier(&sctp_inet6addr_notifier); } |
632 631 632 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 | /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/cgroup-defs.h - basic definitions for cgroup * * This file provides basic type and interface. Include this file directly * only if necessary to avoid cyclic dependencies. */ #ifndef _LINUX_CGROUP_DEFS_H #define _LINUX_CGROUP_DEFS_H #include <linux/limits.h> #include <linux/list.h> #include <linux/idr.h> #include <linux/wait.h> #include <linux/mutex.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> #include <linux/u64_stats_sync.h> #include <linux/workqueue.h> #include <linux/bpf-cgroup-defs.h> #include <linux/psi_types.h> #ifdef CONFIG_CGROUPS struct cgroup; struct cgroup_root; struct cgroup_subsys; struct cgroup_taskset; struct kernfs_node; struct kernfs_ops; struct kernfs_open_file; struct seq_file; struct poll_table_struct; #define MAX_CGROUP_TYPE_NAMELEN 32 #define MAX_CGROUP_ROOT_NAMELEN 64 #define MAX_CFTYPE_NAME 64 /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, enum cgroup_subsys_id { #include <linux/cgroup_subsys.h> CGROUP_SUBSYS_COUNT, }; #undef SUBSYS /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ CSS_VISIBLE = (1 << 3), /* css is visible to userland */ CSS_DYING = (1 << 4), /* css is dying */ }; /* bits in struct cgroup flags field */ enum { /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, /* * Clone the parent's configuration when creating a new child * cpuset cgroup. For historical reasons, this option can be * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, /* Control group has to be frozen. */ CGRP_FREEZE, /* Cgroup is frozen. */ CGRP_FROZEN, }; /* cgroup_root->flags */ enum { CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ /* * Consider namespaces as delegation boundaries. If this flag is * set, controller specific interface files in a namespace root * aren't writeable from inside the namespace. */ CGRP_ROOT_NS_DELEGATE = (1 << 3), /* * Reduce latencies on dynamic cgroup modifications such as task * migrations and controller on/offs by disabling percpu operation on * cgroup_threadgroup_rwsem. This makes hot path operations such as * forks and exits into the slow path and more expensive. * * The static usage pattern of creating a cgroup, enabling controllers, * and then seeding it with CLONE_INTO_CGROUP doesn't require write * locking cgroup_threadgroup_rwsem and thus doesn't benefit from * favordynmod. */ CGRP_ROOT_FAVOR_DYNMODS = (1 << 4), /* * Enable cpuset controller in v1 cgroup to use v2 behavior. */ CGRP_ROOT_CPUSET_V2_MODE = (1 << 16), /* * Enable legacy local memory.events. */ CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17), /* * Enable recursive subtree protection */ CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), /* * Enable hugetlb accounting for the memory controller. */ CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), /* * Enable legacy local pids.events. */ CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20), }; /* cftype->flags */ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_NS_DELEGATABLE = (1 << 2), /* writeable beyond delegation boundaries */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ __CFTYPE_ADDED = (1 << 18), }; /* * cgroup_file is the handle for a file instance created in a cgroup which * is used, for example, to generate file changed notifications. This can * be obtained by setting cftype->file_offset. */ struct cgroup_file { /* do not access any fields from outside cgroup core */ struct kernfs_node *kn; unsigned long notified_at; struct timer_list notify_timer; }; /* * Per-subsystem/per-cgroup state maintained by the system. This is the * fundamental structural building block that controllers deal with. * * Fields marked with "PI:" are public and immutable and may be accessed * directly without synchronization. */ struct cgroup_subsys_state { /* PI: the cgroup that this css is attached to */ struct cgroup *cgroup; /* PI: the cgroup subsystem that this css is attached to */ struct cgroup_subsys *ss; /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; /* * siblings list anchored at the parent's ->children * * linkage is protected by cgroup_mutex or RCU */ struct list_head sibling; struct list_head children; /* flush target list anchored at cgrp->rstat_css_list */ struct list_head rstat_css_node; /* * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). */ int id; unsigned int flags; /* * Monotonically increasing unique serial number which defines a * uniform order among all csses. It's guaranteed that all * ->children lists are in the ascending order of ->serial_nr and * used to allow interrupting and resuming iterations. */ u64 serial_nr; /* * Incremented by online self and children. Used to guarantee that * parents are not offlined before their children. */ atomic_t online_cnt; /* percpu_ref killing and RCU release */ struct work_struct destroy_work; struct rcu_work destroy_rwork; /* * PI: the parent css. Placed here for cache proximity to following * fields of the containing structure. */ struct cgroup_subsys_state *parent; /* * Keep track of total numbers of visible descendant CSSes. * The total number of dying CSSes is tracked in * css->cgroup->nr_dying_subsys[ssid]. * Protected by cgroup_mutex. */ int nr_descendants; }; /* * A css_set is a structure holding pointers to a set of * cgroup_subsys_state objects. This saves space in the task struct * object and speeds up fork()/exit(), since a single inc/dec and a * list_add()/del() can bump the reference count on the entire cgroup * set for a task. */ struct css_set { /* * Set of subsystem states, one for each subsystem. This array is * immutable after creation apart from the init_css_set during * subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; /* reference count */ refcount_t refcount; /* * For a domain cgroup, the following points to self. If threaded, * to the matching cset of the nearest domain ancestor. The * dom_cset provides access to the domain cgroup and its csses to * which domain level resource consumptions should be charged. */ struct css_set *dom_cset; /* the default cgroup associated with this css_set */ struct cgroup *dfl_cgrp; /* internal task count, protected by css_set_lock */ int nr_tasks; /* * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; struct list_head mg_tasks; struct list_head dying_tasks; /* all css_task_iters currently walking this cset */ struct list_head task_iters; /* * On the default hierarchy, ->subsys[ssid] may point to a css * attached to an ancestor instead of the cgroup this css_set is * associated with. The following node is anchored at * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to * iterate through all css's attached to a given cgroup. */ struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; /* all threaded csets whose ->dom_cset points to this cset */ struct list_head threaded_csets; struct list_head threaded_csets_node; /* * List running through all cgroup groups in the same hash * slot. Protected by css_set_lock */ struct hlist_node hlist; /* * List of cgrp_cset_links pointing at cgroups referenced from this * css_set. Protected by css_set_lock. */ struct list_head cgrp_links; /* * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ struct list_head mg_src_preload_node; struct list_head mg_dst_preload_node; struct list_head mg_node; /* * If this cset is acting as the source of migration the following * two fields are set. mg_src_cgrp and mg_dst_cgrp are * respectively the source and destination cgroups of the on-going * migration. mg_dst_cset is the destination cset the target tasks * on this cset should be migrated to. Protected by cgroup_mutex. */ struct cgroup *mg_src_cgrp; struct cgroup *mg_dst_cgrp; struct css_set *mg_dst_cset; /* dead and being drained, ignore for migration */ bool dead; /* For RCU-protected deletion */ struct rcu_head rcu_head; }; struct cgroup_base_stat { struct task_cputime cputime; #ifdef CONFIG_SCHED_CORE u64 forceidle_sum; #endif u64 ntime; }; /* * rstat - cgroup scalable recursive statistics. Accounting is done * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the * hierarchy on reads. * * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are * linked into the updated tree. On the following read, propagation only * considers and consumes the updated tree. This makes reading O(the * number of descendants which have been active since last read) instead of * O(the total number of descendants). * * This is important because there can be a lot of (draining) cgroups which * aren't active and stat may be read frequently. The combination can * become very expensive. By propagating selectively, increasing reading * frequency decreases the cost of each read. * * This struct hosts both the fields which implement the above - * updated_children and updated_next - and the fields which track basic * resource statistics on top of it - bsync, bstat and last_bstat. */ struct cgroup_rstat_cpu { /* * ->bsync protects ->bstat. These are the only fields which get * updated in the hot path. */ struct u64_stats_sync bsync; struct cgroup_base_stat bstat; /* * Snapshots at the last reading. These are used to calculate the * deltas to propagate to the global counters. */ struct cgroup_base_stat last_bstat; /* * This field is used to record the cumulative per-cpu time of * the cgroup and its descendants. Currently it can be read via * eBPF/drgn etc, and we are still trying to determine how to * expose it in the cgroupfs interface. */ struct cgroup_base_stat subtree_bstat; /* * Snapshots at the last reading. These are used to calculate the * deltas to propagate to the per-cpu subtree_bstat. */ struct cgroup_base_stat last_subtree_bstat; /* * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through * ->updated_next. * * In addition to being more compact, singly-linked list pointing * to the cgroup makes it unnecessary for each per-cpu struct to * point back to the associated cgroup. * * Protected by per-cpu cgroup_rstat_cpu_lock. */ struct cgroup *updated_children; /* terminated by self cgroup */ struct cgroup *updated_next; /* NULL iff not on the list */ }; struct cgroup_freezer_state { /* Should the cgroup and its descendants be frozen. */ bool freeze; /* Should the cgroup actually be frozen? */ bool e_freeze; /* Fields below are protected by css_set_lock */ /* Number of frozen descendant cgroups */ int nr_frozen_descendants; /* * Number of tasks, which are counted as frozen: * frozen, SIGSTOPped, and PTRACEd. */ int nr_frozen_tasks; }; struct cgroup { /* self css with NULL ->ss, points back to this cgroup */ struct cgroup_subsys_state self; unsigned long flags; /* "unsigned long" so bitops work */ /* * The depth this cgroup is at. The root is at depth zero and each * step down the hierarchy increments the level. This along with * ancestors[] can determine whether a given cgroup is a * descendant of another without traversing the hierarchy. */ int level; /* Maximum allowed descent tree depth */ int max_depth; /* * Keep track of total numbers of visible and dying descent cgroups. * Dying cgroups are cgroups which were deleted by a user, * but are still existing because someone else is holding a reference. * max_descendants is a maximum allowed number of descent cgroups. * * nr_descendants and nr_dying_descendants are protected * by cgroup_mutex and css_set_lock. It's fine to read them holding * any of cgroup_mutex and css_set_lock; for writing both locks * should be held. */ int nr_descendants; int nr_dying_descendants; int max_descendants; /* * Each non-empty css_set associated with this cgroup contributes * one to nr_populated_csets. The counter is zero iff this cgroup * doesn't have any tasks. * * All children which have non-zero nr_populated_csets and/or * nr_populated_children of their own contribute one to either * nr_populated_domain_children or nr_populated_threaded_children * depending on their type. Each counter is zero iff all cgroups * of the type in the subtree proper don't have any tasks. */ int nr_populated_csets; int nr_populated_domain_children; int nr_populated_threaded_children; int nr_threaded_children; /* # of live threaded child cgroups */ /* sequence number for cgroup.kill, serialized by css_set_lock. */ unsigned int kill_seq; struct kernfs_node *kn; /* cgroup kernfs entry */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */ struct cgroup_file events_file; /* handle for "cgroup.events" */ /* handles for "{cpu,memory,io,irq}.pressure" */ struct cgroup_file psi_files[NR_PSI_RESOURCES]; /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through * "cgroup.subtree_control" while ->subtree_ss_mask is the effective * one which may have more subsystems enabled. Controller knobs * are made available iff it's enabled in ->subtree_control. */ u16 subtree_control; u16 subtree_ss_mask; u16 old_subtree_control; u16 old_subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; /* * Keep track of total number of dying CSSes at and below this cgroup. * Protected by cgroup_mutex. */ int nr_dying_subsys[CGROUP_SUBSYS_COUNT]; struct cgroup_root *root; /* * List of cgrp_cset_links pointing at css_sets with tasks in this * cgroup. Protected by css_set_lock. */ struct list_head cset_links; /* * On the default hierarchy, a css_set for a cgroup with some * susbsys disabled will point to css's which are associated with * the closest ancestor which has the subsys enabled. The * following lists all css_sets which point to this cgroup's css * for the given subsystem. */ struct list_head e_csets[CGROUP_SUBSYS_COUNT]; /* * If !threaded, self. If threaded, it points to the nearest * domain ancestor. Inside a threaded subtree, cgroups are exempt * from process granularity and no-internal-task constraint. * Domain level resource consumptions which aren't tied to a * specific task are charged to the dom_cgrp. */ struct cgroup *dom_cgrp; struct cgroup *old_dom_cgrp; /* used while enabling threaded */ /* per-cpu recursive resource statistics */ struct cgroup_rstat_cpu __percpu *rstat_cpu; struct list_head rstat_css_list; /* * Add padding to separate the read mostly rstat_cpu and * rstat_css_list into a different cacheline from the following * rstat_flush_next and *bstat fields which can have frequent updates. */ CACHELINE_PADDING(_pad_); /* * A singly-linked list of cgroup structures to be rstat flushed. * This is a scratch field to be used exclusively by * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. */ struct cgroup *rstat_flush_next; /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; struct prev_cputime prev_cputime; /* for printing out cputime */ /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. */ struct list_head pidlists; struct mutex pidlist_mutex; /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; /* used to schedule release agent */ struct work_struct release_agent_work; /* used to track pressure stalls */ struct psi_group *psi; /* used to store eBPF programs */ struct cgroup_bpf bpf; /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; #ifdef CONFIG_BPF_SYSCALL struct bpf_local_storage __rcu *bpf_cgrp_storage; #endif /* All ancestors including self */ struct cgroup *ancestors[]; }; /* * A cgroup_root represents the root of a cgroup hierarchy, and may be * associated with a kernfs_root to form an active hierarchy. This is * internal to cgroup core. Don't access directly from controllers. */ struct cgroup_root { struct kernfs_root *kf_root; /* The bitmask of subsystems attached to this hierarchy */ unsigned int subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; /* A list running through the active hierarchies */ struct list_head root_list; struct rcu_head rcu; /* Must be near the top */ /* * The root cgroup. The containing cgroup_root will be destroyed on its * release. cgrp->ancestors[0] will be used overflowing into the * following field. cgrp_ancestor_storage must immediately follow. */ struct cgroup cgrp; /* must follow cgrp for cgrp->ancestors[0], see above */ struct cgroup *cgrp_ancestor_storage; /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; /* Hierarchy-specific flags */ unsigned int flags; /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; /* The name for this hierarchy - may be empty */ char name[MAX_CGROUP_ROOT_NAMELEN]; }; /* * struct cftype: handler definitions for cgroup control files * * When reading/writing to a file: * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata * - the 'cftype' of the file is file->f_path.dentry->d_fsdata */ struct cftype { /* * By convention, the name should begin with the name of the * subsystem, followed by a period. Zero length string indicates * end of cftype array. */ char name[MAX_CFTYPE_NAME]; unsigned long private; /* * The maximum length of string, excluding trailing nul, that can * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. */ size_t max_write_len; /* CFTYPE_* flags */ unsigned int flags; /* * If non-zero, should contain the offset from the start of css to * a struct cgroup_file field. cgroup will record the handle of * the created file into it. The recorded handle can be used as * long as the containing css remains accessible. */ unsigned int file_offset; /* * Fields used for internal bookkeeping. Initialized automatically * during registration. */ struct cgroup_subsys *ss; /* NULL for cgroup core files */ struct list_head node; /* anchored at ss->cfts */ struct kernfs_ops *kf_ops; int (*open)(struct kernfs_open_file *of); void (*release)(struct kernfs_open_file *of); /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() */ u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); /* * read_s64() is a signed version of read_u64() */ s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); /* generic seq_file read interface */ int (*seq_show)(struct seq_file *sf, void *v); /* optional ops, implement all or none */ void *(*seq_start)(struct seq_file *sf, loff_t *ppos); void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); void (*seq_stop)(struct seq_file *sf, void *v); /* * write_u64() is a shortcut for the common case of accepting * a single integer (as parsed by simple_strtoull) from * userspace. Use in place of write(); return 0 or error. */ int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, u64 val); /* * write_s64() is a signed version of write_u64() */ int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, s64 val); /* * write() is the generic write callback which maps directly to * kernfs write operation and overrides all other operations. * Maximum write size is determined by ->max_write_len. Use * of_css/cft() to access the associated css and cft. */ ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); struct lock_class_key lockdep_key; }; /* * Control Group subsystem type. * See Documentation/admin-guide/cgroup-v1/cgroups.rst for details */ struct cgroup_subsys { struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); int (*css_online)(struct cgroup_subsys_state *css); void (*css_offline)(struct cgroup_subsys_state *css); void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); int (*css_local_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); void (*post_attach)(void); int (*can_fork)(struct task_struct *task, struct css_set *cset); void (*cancel_fork)(struct task_struct *task, struct css_set *cset); void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); void (*release)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); bool early_init:1; /* * If %true, the controller, on the default hierarchy, doesn't show * up in "cgroup.controllers" or "cgroup.subtree_control", is * implicitly enabled on all cgroups on the default hierarchy, and * bypasses the "no internal process" constraint. This is for * utility type controllers which is transparent to userland. * * An implicit controller can be stolen from the default hierarchy * anytime and thus must be okay with offline csses from previous * hierarchies coexisting with csses for the current one. */ bool implicit_on_dfl:1; /* * If %true, the controller, supports threaded mode on the default * hierarchy. In a threaded subtree, both process granularity and * no-internal-process constraint are ignored and a threaded * controllers should be able to handle that. * * Note that as an implicit controller is automatically enabled on * all cgroups on the default hierarchy, it should also be * threaded. implicit && !threaded is not supported. */ bool threaded:1; /* the following two fields are initialized automatically during boot */ int id; const char *name; /* optional, initialized automatically during boot if not set */ const char *legacy_name; /* link to parent, protected by cgroup_lock() */ struct cgroup_root *root; /* idr for css->id */ struct idr css_idr; /* * List of cftypes. Each entry is the first entry of an array * terminated by zero length name. */ struct list_head cfts; /* * Base cftypes which are automatically registered. The two can * point to the same array. */ struct cftype *dfl_cftypes; /* for the default hierarchy */ struct cftype *legacy_cftypes; /* for the legacy hierarchies */ /* * A subsystem may depend on other subsystems. When such subsystem * is enabled on a cgroup, the depended-upon subsystems are enabled * together if available. Subsystems enabled due to dependency are * not visible to userland until explicitly enabled. The following * specifies the mask of subsystems that this one depends on. */ unsigned int depends_on; }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; struct cgroup_of_peak { unsigned long value; struct list_head list; }; /** * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task * * Allows cgroup operations to synchronize against threadgroup changes * using a percpu_rw_semaphore. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { percpu_down_read(&cgroup_threadgroup_rwsem); } /** * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups * @tsk: target task * * Counterpart of cgroup_threadcgroup_change_begin(). */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { percpu_up_read(&cgroup_threadgroup_rwsem); } #else /* CONFIG_CGROUPS */ #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { might_sleep(); } static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ #ifdef CONFIG_SOCK_CGROUP_DATA /* * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains * per-socket cgroup information except for memcg association. * * On legacy hierarchies, net_prio and net_cls controllers directly * set attributes on each sock which can then be tested by the network * layer. On the default hierarchy, each sock is associated with the * cgroup it was created in and the networking layer can match the * cgroup directly. */ struct sock_cgroup_data { struct cgroup *cgroup; /* v2 */ #ifdef CONFIG_CGROUP_NET_CLASSID u32 classid; /* v1 */ #endif #ifdef CONFIG_CGROUP_NET_PRIO u16 prioidx; /* v1 */ #endif }; static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { #ifdef CONFIG_CGROUP_NET_PRIO return READ_ONCE(skcd->prioidx); #else return 1; #endif } static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { #ifdef CONFIG_CGROUP_NET_CLASSID return READ_ONCE(skcd->classid); #else return 0; #endif } static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { #ifdef CONFIG_CGROUP_NET_PRIO WRITE_ONCE(skcd->prioidx, prioidx); #endif } static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { #ifdef CONFIG_CGROUP_NET_CLASSID WRITE_ONCE(skcd->classid, classid); #endif } #else /* CONFIG_SOCK_CGROUP_DATA */ struct sock_cgroup_data { }; #endif /* CONFIG_SOCK_CGROUP_DATA */ #endif /* _LINUX_CGROUP_DEFS_H */ |
6 6 8 18 8 8 8 2 6 1 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | // SPDX-License-Identifier: GPL-2.0-only /* * LED support for the input layer * * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/input.h> #if IS_ENABLED(CONFIG_VT) #define VT_TRIGGER(_name) .trigger = _name #else #define VT_TRIGGER(_name) .trigger = NULL #endif #if IS_ENABLED(CONFIG_SND_CTL_LED) #define AUDIO_TRIGGER(_name) .trigger = _name #else #define AUDIO_TRIGGER(_name) .trigger = NULL #endif static const struct { const char *name; const char *trigger; } input_led_info[LED_CNT] = { [LED_NUML] = { "numlock", VT_TRIGGER("kbd-numlock") }, [LED_CAPSL] = { "capslock", VT_TRIGGER("kbd-capslock") }, [LED_SCROLLL] = { "scrolllock", VT_TRIGGER("kbd-scrolllock") }, [LED_COMPOSE] = { "compose" }, [LED_KANA] = { "kana", VT_TRIGGER("kbd-kanalock") }, [LED_SLEEP] = { "sleep" } , [LED_SUSPEND] = { "suspend" }, [LED_MUTE] = { "mute", AUDIO_TRIGGER("audio-mute") }, [LED_MISC] = { "misc" }, [LED_MAIL] = { "mail" }, [LED_CHARGING] = { "charging" }, }; struct input_led { struct led_classdev cdev; struct input_handle *handle; unsigned int code; /* One of LED_* constants */ }; struct input_leds { struct input_handle handle; unsigned int num_leds; struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) { struct input_led *led = container_of(cdev, struct input_led, cdev); struct input_dev *input = led->handle->dev; return test_bit(led->code, input->led) ? cdev->max_brightness : 0; } static void input_leds_brightness_set(struct led_classdev *cdev, enum led_brightness brightness) { struct input_led *led = container_of(cdev, struct input_led, cdev); input_inject_event(led->handle, EV_LED, led->code, !!brightness); } static void input_leds_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { } static int input_leds_get_count(struct input_dev *dev) { unsigned int led_code; int count = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) if (input_led_info[led_code].name) count++; return count; } static int input_leds_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_leds *leds; struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; int error; num_leds = input_leds_get_count(dev); if (!num_leds) return -ENXIO; leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL); if (!leds) return -ENOMEM; leds->num_leds = num_leds; leds->handle.dev = dev; leds->handle.handler = handler; leds->handle.name = "leds"; leds->handle.private = leds; error = input_register_handle(&leds->handle); if (error) goto err_free_mem; error = input_open_device(&leds->handle); if (error) goto err_unregister_handle; led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { if (!input_led_info[led_code].name) continue; led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); if (!led->cdev.name) { error = -ENOMEM; goto err_unregister_leds; } led->cdev.max_brightness = 1; led->cdev.brightness_get = input_leds_brightness_get; led->cdev.brightness_set = input_leds_brightness_set; led->cdev.default_trigger = input_led_info[led_code].trigger; error = led_classdev_register(&dev->dev, &led->cdev); if (error) { dev_err(&dev->dev, "failed to register LED %s: %d\n", led->cdev.name, error); kfree(led->cdev.name); goto err_unregister_leds; } led_no++; } return 0; err_unregister_leds: while (--led_no >= 0) { struct input_led *led = &leds->leds[led_no]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(&leds->handle); err_unregister_handle: input_unregister_handle(&leds->handle); err_free_mem: kfree(leds); return error; } static void input_leds_disconnect(struct input_handle *handle) { struct input_leds *leds = handle->private; int i; for (i = 0; i < leds->num_leds; i++) { struct input_led *led = &leds->leds[i]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(handle); input_unregister_handle(handle); kfree(leds); } static const struct input_device_id input_leds_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_LED) }, }, { }, }; MODULE_DEVICE_TABLE(input, input_leds_ids); static struct input_handler input_leds_handler = { .event = input_leds_event, .connect = input_leds_connect, .disconnect = input_leds_disconnect, .name = "leds", .id_table = input_leds_ids, }; static int __init input_leds_init(void) { return input_register_handler(&input_leds_handler); } module_init(input_leds_init); static void __exit input_leds_exit(void) { input_unregister_handler(&input_leds_handler); } module_exit(input_leds_exit); MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>"); MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>"); MODULE_DESCRIPTION("Input -> LEDs Bridge"); MODULE_LICENSE("GPL v2"); |
778 781 783 1 1 665 270 103 1 87 86 248 23 752 757 759 564 405 405 757 759 759 1 1 1 47 1 785 47 74 742 781 782 785 780 783 782 779 2 758 6 491 494 493 481 12 476 16 14 475 8 5 7 483 11 490 492 488 490 490 491 492 487 29 29 29 29 29 69 69 69 69 69 17 18 18 18 18 18 17 18 18 15 12 51 50 1 50 50 48 51 75 75 2 73 2 74 72 8 3 69 43 26 26 18 8 21 18 18 14 18 14 8 49 2 49 48 9 48 1 137 91 2 48 46 8 946 626 630 627 631 631 627 8 503 197 503 705 237 813 814 813 809 781 9 62 812 1 17 115 795 242 737 24 137 137 137 99 91 8 58 39 37 318 328 328 275 58 57 58 58 57 58 329 329 326 357 47 329 356 357 1 350 4 355 11 282 74 49 35 2 13 1 8 4 299 6 339 5 343 333 36 21 324 325 325 31 298 112 232 324 322 2 320 166 321 284 42 228 112 46 11 8 3 38 38 38 38 36 6 220 135 16 282 283 10 47 244 245 273 350 291 319 37 301 2 291 10 302 31 302 302 290 17 42 301 300 301 302 302 209 120 120 121 298 296 296 289 10 298 217 217 1 216 56 56 18 41 42 94 95 4 91 14 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 output functions * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/net/ipv4/ip_output.c * * Changes: * A.N.Kuznetsov : airthmetics in fragmentation. * extension headers are implemented. * route changes now work. * ip6_forward does not confuse sniffers. * etc. * * H. von Brand : Added missing #include <linux/string.h> * Imran Patel : frag id should be in NBO * Kazunori MIYAZAWA @USAGI * : add ip6_append_data and related functions * for datagram xmit */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/route.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/bpf-cgroup.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/sock.h> #include <net/snmp.h> #include <net/gso.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/rawv6.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/checksum.h> #include <linux/mroute6.h> #include <net/l3mdev.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { /* Make sure idev stays alive */ rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return -ENOMEM; } rcu_read_unlock(); } hdr = ipv6_hdr(skb); daddr = &hdr->daddr; if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing is not supported in any case. */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; } } if (lwtunnel_xmit_redirect(dst->lwtstate)) { int res = lwtunnel_xmit(skb); if (res != LWTUNNEL_XMIT_CONTINUE) return res; } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (IS_ERR_OR_NULL(neigh)) { if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; } } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); rcu_read_unlock(); return ret; } static int ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { struct sk_buff *segs, *nskb; netdev_features_t features; int ret = 0; /* Please see corresponding comment in ip_finish_output_gso * describing the cases where GSO segment length exceeds the * egress MTU. */ features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } consume_skb(skb); skb_list_walk_safe(segs, segs, nskb) { int err; skb_mark_not_on_list(segs); /* Last GSO segment can be smaller than gso_size (and MTU). * Adding a fragment header would produce an "atomic fragment", * which is considered harmful (RFC-8021). Avoid that. */ err = segs->len > mtu ? ip6_fragment(net, sk, segs, ip6_finish_output2) : ip6_finish_output2(net, sk, segs); if (err && ret == 0) ret = err; } return ret; } static int ip6_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && !skb_gso_validate_network_len(skb, mtu)) return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); return ip6_finish_output2(net, sk, skb); } static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IP6CB(skb)->flags |= IP6SKB_REROUTED; return dst_output(net, sk, skb); } #endif mtu = ip6_skb_dst_mtu(skb); if (skb_is_gso(skb)) return ip6_finish_output_gso(net, sk, skb, mtu); if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { int ret; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, indev, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } EXPORT_SYMBOL(ip6_output); bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); return inet6_test_bit(AUTOFLOWLABEL, sk); } /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified * by calls to skb_set_owner_w() and ipv6_local_error(), * which are using proper atomic operations or spinlocks. */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) { struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { /* Make sure idev stays alive */ rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return -ENOBUFS; } rcu_read_unlock(); } if (opt) { seg_len += opt->opt_nflen + opt->opt_flen; if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, &fl6->saddr); } if (unlikely(seg_len > IPV6_MAXPLEN)) { hop_jumbo = skb_push(skb, hoplen); hop_jumbo->nexthdr = proto; hop_jumbo->hdrlen = 0; hop_jumbo->tlv_type = IPV6_TLV_JUMBO; hop_jumbo->tlv_len = 4; hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); proto = IPPROTO_HOPOPTS; seg_len = 0; IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); /* * Fill in the IPv6 header */ if (np) hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; hdr->saddr = fl6->saddr; hdr->daddr = *first_hop; skb->protocol = htons(ETH_P_IPV6); skb->priority = priority; skb->mark = mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out((struct sock *)sk, skb); if (unlikely(!skb)) return 0; /* hooks should never assume socket lock is held. * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, (struct sock *)sk, skb, NULL, dev, dst_output); } skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } EXPORT_SYMBOL(ip6_xmit); static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; struct sock *last = NULL; read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) rawv6_rcv(last, skb2); } last = sk; } } if (last) { rawv6_rcv(last, skb); read_unlock(&ip6_ra_lock); return 1; } read_unlock(&ip6_ra_lock); return 0; } static int ip6_forward_proxy_check(struct sk_buff *skb) { struct ipv6hdr *hdr = ipv6_hdr(skb); u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; if (ipv6_ext_hdr(nexthdr)) { offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); if (offset < 0) return 0; } else offset = sizeof(struct ipv6hdr); if (nexthdr == IPPROTO_ICMPV6) { struct icmp6hdr *icmp6; if (!pskb_may_pull(skb, (skb_network_header(skb) + offset + 1 - skb->data))) return 0; icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); switch (icmp6->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: /* For reaction involving unicast neighbor discovery * message destined to the proxied address, pass it to * input function. */ return 1; default: break; } } /* * The proxying router can't forward traffic sent to a link-local * address, so signal the sender and discard the packet. This * behavior is clarified by the MIPv6 specification. */ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { dst_link_failure(skb); return -1; } return 0; } static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); return 0; } #endif skb_clear_tstamp(skb); return dst_output(net, sk, skb); } static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { if (skb->len <= mtu) return false; /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; if (skb->ignore_df) return false; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; } int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) goto error; if (skb->pkt_type != PACKET_HOST) goto drop; if (unlikely(skb->sk)) goto drop; if (skb_warn_if_lro(skb)) goto drop; if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } skb_forward_csum(skb); /* * We DO NOT make any processing on * RA packets, pushing them to user level AS IS * without ane WARRANTY that application will be able * to interpret them. The reason is that we * cannot make anything clever here. * * We are not end-node, so that if packet contains * AH/ESP, we cannot make anything. * Defragmentation also would be mistake, RA packets * cannot be fragmented, because there is no warranty * that different fragments will go along one path. --ANK */ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { if (ip6_call_ra_chain(skb, ntohs(opt->ra))) return 0; } /* * check and decrement ttl */ if (hdr->hop_limit <= 1) { icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return -ETIMEDOUT; } /* XXX: idev->cnf.proxy_ndp? */ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the * function too. * * But that would be incorrect, as proxying is * not forwarding. The ip6_input function * will handle this packet locally, and it * depends on the hop limit being unchanged. * * One example is the NDP hop limit, that * always has to stay 255, but other would be * similar checks around RA packets, where the * user can even change the desired limit. */ return ip6_input(skb); } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); SKB_DR_SET(reason, XFRM_POLICY); goto drop; } dst = skb_dst(skb); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ if (IP6CB(skb)->iif == dst->dev->ifindex && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct inet_peer *peer; struct rt6_info *rt; /* * incoming and outgoing devices are the same * send a redirect. */ rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) target = &rt->rt6i_gateway; else target = &hdr->daddr; rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); rcu_read_unlock(); } else { int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ if (addrtype == IPV6_ADDR_ANY || addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOT_NEIGHBOUR, 0); goto error; } } __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } hdr = ipv6_hdr(skb); /* Mangling hops number delayed to point after skb COW */ hdr->hop_limit--; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); error: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); SKB_DR_SET(reason, IP_INADDRERRORS); drop: kfree_skb_reason(skb, reason); return -EINVAL; } static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; skb_dst_drop(to); skb_dst_set(to, dst_clone(skb_dst(from))); to->dev = from->dev; to->mark = from->mark; skb_copy_hash(to, from); #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif nf_copy(to, from); skb_ext_copy(to, from); skb_copy_secmark(to, from); } int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_fraglist_iter *iter) { unsigned int first_len; struct frag_hdr *fh; /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!iter->tmp_hdr) return -ENOMEM; iter->frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); iter->offset = 0; iter->hlen = hlen; iter->frag_id = frag_id; iter->nexthdr = nexthdr; __skb_pull(skb, hlen); fh = __skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); return 0; } EXPORT_SYMBOL(ip6_fraglist_init); void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter) { struct sk_buff *frag = iter->frag; unsigned int hlen = iter->hlen; struct frag_hdr *fh; frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); fh = __skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); iter->offset += skb->len - hlen - sizeof(struct frag_hdr); fh->nexthdr = iter->nexthdr; fh->reserved = 0; fh->frag_off = htons(iter->offset); if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = iter->frag_id; ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ip6_copy_metadata(frag, skb); } EXPORT_SYMBOL(ip6_fraglist_prepare); void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) { state->prevhdr = prevhdr; state->nexthdr = nexthdr; state->frag_id = frag_id; state->hlen = hlen; state->mtu = mtu; state->left = skb->len - hlen; /* Space per frame */ state->ptr = hlen; /* Where to start from */ state->hroom = hdr_room; state->troom = needed_tailroom; state->offset = 0; } EXPORT_SYMBOL(ip6_frag_init); struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) { u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; struct sk_buff *frag; struct frag_hdr *fh; unsigned int len; len = state->left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > state->mtu) len = state->mtu; /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < state->left) len &= ~7; /* Allocate buffer */ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + state->hroom + state->troom, GFP_ATOMIC); if (!frag) return ERR_PTR(-ENOMEM); /* * Set up data on packet */ ip6_copy_metadata(frag, skb); skb_reserve(frag, state->hroom); skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); frag->transport_header = (frag->network_header + state->hlen + sizeof(struct frag_hdr)); /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(frag, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); fragnexthdr_offset = skb_network_header(frag); fragnexthdr_offset += prevhdr - skb_network_header(skb); *fragnexthdr_offset = NEXTHDR_FRAGMENT; /* * Build fragment header. */ fh->nexthdr = state->nexthdr; fh->reserved = 0; fh->identification = state->frag_id; /* * Copy a block of the IP datagram. */ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), len)); state->left -= len; fh->frag_off = htons(state->offset); if (state->left > 0) fh->frag_off |= htons(IP6_MF); ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); state->ptr += len; state->offset += len; return frag; } EXPORT_SYMBOL(ip6_frag_next); int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; int hroom, err = 0; __be32 frag_id; u8 *prevhdr, nexthdr = 0; err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto fail; hlen = err; nexthdr = *prevhdr; nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ if (unlikely(!skb->ignore_df && skb->len > mtu)) goto fail_toobig; if (IP6CB(skb)->frag_max_size) { if (IP6CB(skb)->frag_max_size > mtu) goto fail_toobig; /* don't send fragments larger than what we received */ mtu = IP6CB(skb)->frag_max_size; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; } if (np) { u32 frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto fail; prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); struct ip6_fraglist_iter iter; struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || skb_cloned(skb) || skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } skb->truesize -= frag->truesize; } err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, &iter); if (err < 0) goto fail; /* We prevent @rt from being freed. */ rcu_read_lock(); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) ip6_fraglist_prepare(skb, &iter); skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !iter.frag) break; skb = ip6_fraglist_next(&iter); } kfree(iter.tmp_hdr); if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); rcu_read_unlock(); return 0; } kfree_skb_list(iter.frag); IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); rcu_read_unlock(); return err; slow_path_clean: skb_walk_frags(skb, frag2) { if (frag2 == frag) break; frag2->sk = NULL; frag2->destructor = NULL; skb->truesize += frag2->truesize; } } slow_path: /* * Fragment the datagram. */ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, &state); /* * Keep copying data until we run out. */ while (state.left > 0) { frag = ip6_frag_next(skb, &state); if (IS_ERR(frag)) { err = PTR_ERR(frag); goto fail; } /* * Put this fragment into the sending queue. */ skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); consume_skb(skb); return err; fail_toobig: icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; } static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *fl_addr, const struct in6_addr *addr_cache) { return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, const struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt; if (!dst) goto out; if (dst->ops->family != AF_INET6) { dst_release(dst); return NULL; } rt = dst_rt6_info(dst); /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, * and MSG_DONTROUTE --ANK (980726) * * 1. ip6_rt_check(): If route was host route, * check that cached destination is current. * If it is network route, we still may * check its validity using saved pointer * to the last used address: daddr_cache. * We do not want to save whole address now, * (because main consumer of this service * is tcp, which has not this problem), * so that the last trick works only on connected * sockets. * 2. oif also should be the same. */ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } out: return dst; } static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; struct rt6_info *rt; #endif int err; int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the * ip6_route_output call _before_ ip6_route_get_saddr. * * In source specific routing (no src=any default route), * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ if (ipv6_addr_any(&fl6->saddr)) { struct fib6_info *from; struct rt6_info *rt; *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); if (err) goto out_err_release; /* If we had an erroneous initial result, pretend it * never existed and let the SA-enabled version take * over. */ if ((*dst)->error) { dst_release(*dst); *dst = NULL; } if (fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) goto out_err_release; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * Here if the dst entry we've looked up * has a neighbour entry that is in the INCOMPLETE * state and the src address from the flow is * marked as OPTIMISTIC, we release the found * dst entry and replace it instead with the * dst entry of the nexthop router */ rt = dst_rt6_info(*dst); rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock(); if (err) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); if (ifp) in6_ifa_put(ifp); if (redirect) { /* * We need to get the dst entry for the * default router instead */ dst_release(*dst); memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); *dst = ip6_route_output(net, sk, &fl_gw6); err = (*dst)->error; if (err) goto out_err_release; } } #endif if (ipv6_addr_v4mapped(&fl6->saddr) && !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { err = -EAFNOSUPPORT; goto out_err_release; } return 0; out_err_release: dst_release(*dst); *dst = NULL; if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; } /** * ip6_dst_lookup - perform route lookup on flow * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @dst: pointer to dst_entry * for result * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; return ip6_dst_lookup_tail(net, sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** * ip6_dst_lookup_flow - perform route lookup on flow with ipsec * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * * This function performs a route lookup on the given flow. * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; err = ip6_dst_lookup_tail(net, sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); /** * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @connected: whether @sk is connected or not * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * * In addition, for a connected socket, cache the dst in the socket * if the current cache is not valid. * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); dst = ip6_sk_dst_check(sk, dst, fl6); if (dst) return dst; dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); if (connected && !IS_ERR(dst)) ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, gfp_t gfp) { return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (!skb) { /* first fragment, reserve header_len */ *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); } } static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so * ip6_cork_release() can put it down even in case of an error. */ cork->base.dst = &rt->dst; /* * setup for corking */ if (opt) { if (WARN_ON(v6_cork->opt)) return -EINVAL; nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); if (unlikely(!nopt)) return -ENOBUFS; nopt->tot_len = sizeof(*opt); nopt->opt_flen = opt->opt_flen; nopt->opt_nflen = opt->opt_nflen; nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !nopt->dst0opt) return -ENOBUFS; nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !nopt->dst1opt) return -ENOBUFS; nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !nopt->hopopt) return -ENOBUFS; nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !nopt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) mtu = frag_size; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; cork->base.priority = ipc6->sockc.priority; sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; } cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; } static int __ip6_append_data(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork_full, struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, unsigned int flags, struct ipcm6_cookie *ipc6) { struct sk_buff *skb, *skb_prev = NULL; struct inet_cork *cork = &cork_full->base; struct flowi6 *fl6 = &cork_full->fl.u.ip6; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; struct ubuf_info *uarg = NULL; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; bool zc = false; u32 tskey = 0; struct rt6_info *rt = dst_rt6_info(cork->dst); bool paged, hold_tskey = false, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; skb = skb_peek_tail(queue); if (!skb) { exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } paged = !!cork->gso_size; mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) goto emsgsize; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ if (headersize + transhdrlen > mtu) goto emsgsize; if (cork->length + length > mtu - headersize && ipc6->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu - headersize + sizeof(struct ipv6hdr)); goto emsgsize; } if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; if (cork->length + length > maxnonfragsize - headersize) { emsgsize: pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); return -EMSGSIZE; } /* CHECKSUM_PARTIAL only with no extension headers and when * we are not going to fragment */ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && length <= mtu - headersize && (!(flags & MSG_MORE) || cork->gso_size) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if ((flags & MSG_ZEROCOPY) && length) { struct msghdr *msg = from; if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) return -EINVAL; /* Leave uarg NULL if can't zerocopy, callers should * be able to handle it. */ if ((rt->dst.dev->features & NETIF_F_SG) && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; uarg = msg->msg_ubuf; } } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; } else { uarg_to_msgzc(uarg)->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); } } } else if ((flags & MSG_SPLICE_PAGES) && length) { if (inet_test_bit(HDRINCL, sk)) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && getfrag == ip_generic_getfrag) /* We need an empty buffer to attach stuff to */ paged = true; else flags &= ~MSG_SPLICE_PAGES; } if (cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { if (cork->flags & IPCORK_TS_OPT_ID) { tskey = cork->ts_opt_id; } else { tskey = atomic_inc_return(&sk->sk_tskey) - 1; hold_tskey = true; } } /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ cork->length += length; if (!skb) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen, alloc_extra; unsigned int pagedlen; alloc_new_skb: /* There's no room in the current skb */ if (skb) fraggap = skb->len - maxfraglen; else fraggap = 0; /* update mtu and maxfraglen if necessary */ if (!skb || !skb_prev) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, orig_mtu); skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; alloc_extra = hh_len; alloc_extra += dst_exthdrlen; alloc_extra += rt->dst.trailer_len; /* We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloc_extra += sizeof(struct frag_hdr); if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else if (!paged && (fraglen + alloc_extra < SKB_MAX_ALLOC || !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = fragheaderlen + transhdrlen; pagedlen = datalen - transhdrlen; } alloclen += alloc_extra; if (datalen != length + fraggap) { /* * this is not the last fragment, the trailer * space is regarded as data space. */ datalen += rt->dst.trailer_len; } fraglen = datalen + fragheaderlen; copy = datalen - transhdrlen - fraggap - pagedlen; /* [!] NOTE: copy may be negative if pagedlen>0 * because then the equation may reduces to -fraggap. */ if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { err = -EINVAL; goto error; } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; } if (!skb) goto error; /* * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); /* * Find where to start putting bytes */ data = skb_put(skb, fraglen - pagedlen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } if (copy > 0 && INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } else if (flags & MSG_SPLICE_PAGES) { copy = 0; } offset += copy; length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; /* Only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; cork->tx_flags = 0; skb_shinfo(skb)->tskey = tskey; tskey = 0; skb_zcopy_set(skb, uarg, &extra_uref); if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); /* * Put the packet on the pending queue */ if (!skb->destructor) { skb->destructor = sock_wfree; skb->sk = sk; wmem_alloc_delta += skb->truesize; } __skb_queue_tail(queue, skb); continue; } if (copy > length) copy = length; if (!(rt->dst.dev->features&NETIF_F_SG) && skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else if (flags & MSG_SPLICE_PAGES) { struct msghdr *msg = from; err = -EIO; if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; err = skb_splice_from_iter(skb, &msg->msg_iter, copy, sk->sk_allocation); if (err < 0) goto error; copy = err; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) goto error; skb_zcopy_downgrade_managed(skb); if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { err = -EMSGSIZE; if (i == MAX_SKB_FRAGS) goto error; __skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, 0); skb_shinfo(skb)->nr_frags = ++i; get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); if (err < 0) goto error; } offset += copy; length -= copy; } if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); return 0; error_efault: err = -EFAULT; error: net_zcopy_put_abort(uarg, extra_uref); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); if (hold_tskey) atomic_dec(&sk->sk_tskey); return err; } int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); int exthdrlen; int err; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ dst_hold(&rt->dst); err = ip6_setup_cork(sk, &inet->cork, &np->cork, ipc6, rt); if (err) return err; inet->cork.fl.u.ip6 = *fl6; exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { transhdrlen = 0; } return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags, ipc6); } EXPORT_SYMBOL_GPL(ip6_append_data); static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) { struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; skb_dst_set(skb, dst); } static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { if (v6_cork->opt) { struct ipv6_txoptions *opt = v6_cork->opt; kfree(opt->dst0opt); kfree(opt->dst1opt); kfree(opt->hopopt); kfree(opt->srcrt); kfree(opt); v6_cork->opt = NULL; } if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; } } struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; skb = __skb_dequeue(queue); if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } /* Allow local fragmentation. */ skb->ignore_df = ip6_sk_ignore_df(sk); __skb_pull(skb, skb_network_header_len(skb)); final_dst = &fl6->daddr; if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; skb->priority = cork->base.priority; skb->mark = cork->base.mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); else skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; if (sk->sk_socket->type == SOCK_RAW && !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) icmp6_type = fl6->fl6_icmp_type; else icmp6_type = icmp6_hdr(skb)->icmp6_type; ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } ip6_cork_release(cork, v6_cork); out: return skb; } int ip6_send_skb(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); } rcu_read_unlock(); return err; } int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; skb = ip6_finish_skb(sk); if (!skb) return 0; return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); static void __ip6_flush_pending_frames(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork) { struct sk_buff *skb; while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } ip6_cork_release(cork, v6_cork); } void ip6_flush_pending_frames(struct sock *sk) { __ip6_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork) { struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; if (flags & MSG_PROBE) { dst_release(&rt->dst); return NULL; } __skb_queue_head_init(&queue); cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); if (err) { ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); } return __ip6_make_skb(sk, &queue, cork, &v6_cork); } |
312 13 68 1 8 309 40 159 289 66 5 4 27 6 247 19 230 392 228 2 3 1 20 4 3 7 190 15 1 5 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 | /* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_H #include <linux/tracepoint.h> #include <asm/vmx.h> #include <asm/svm.h> #include <asm/clocksource.h> #include <asm/pvclock-abi.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm /* * Tracepoint for guest mode entry. */ TRACE_EVENT(kvm_entry, TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit), TP_ARGS(vcpu, force_immediate_exit), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned long, rip ) __field( bool, immediate_exit ) __field( u32, intr_info ) __field( u32, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->rip = kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, &__entry->error_code); ), TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s", __entry->vcpu_id, __entry->rip, __entry->intr_info, __entry->error_code, __entry->immediate_exit ? "[immediate exit]" : "") ); /* * Tracepoint for hypercall. */ TRACE_EVENT(kvm_hypercall, TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3), TP_ARGS(nr, a0, a1, a2, a3), TP_STRUCT__entry( __field( unsigned long, nr ) __field( unsigned long, a0 ) __field( unsigned long, a1 ) __field( unsigned long, a2 ) __field( unsigned long, a3 ) ), TP_fast_assign( __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; __entry->a2 = a2; __entry->a3 = a3; ), TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx", __entry->nr, __entry->a0, __entry->a1, __entry->a2, __entry->a3) ); /* * Tracepoint for hypercall. */ TRACE_EVENT(kvm_hv_hypercall, TP_PROTO(__u16 code, bool fast, __u16 var_cnt, __u16 rep_cnt, __u16 rep_idx, __u64 ingpa, __u64 outgpa), TP_ARGS(code, fast, var_cnt, rep_cnt, rep_idx, ingpa, outgpa), TP_STRUCT__entry( __field( __u16, rep_cnt ) __field( __u16, rep_idx ) __field( __u64, ingpa ) __field( __u64, outgpa ) __field( __u16, code ) __field( __u16, var_cnt ) __field( bool, fast ) ), TP_fast_assign( __entry->rep_cnt = rep_cnt; __entry->rep_idx = rep_idx; __entry->ingpa = ingpa; __entry->outgpa = outgpa; __entry->code = code; __entry->var_cnt = var_cnt; __entry->fast = fast; ), TP_printk("code 0x%x %s var_cnt 0x%x rep_cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", __entry->code, __entry->fast ? "fast" : "slow", __entry->var_cnt, __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, __entry->outgpa) ); TRACE_EVENT(kvm_hv_hypercall_done, TP_PROTO(u64 result), TP_ARGS(result), TP_STRUCT__entry( __field(__u64, result) ), TP_fast_assign( __entry->result = result; ), TP_printk("result 0x%llx", __entry->result) ); /* * Tracepoint for Xen hypercall. */ TRACE_EVENT(kvm_xen_hypercall, TP_PROTO(u8 cpl, unsigned long nr, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5), TP_ARGS(cpl, nr, a0, a1, a2, a3, a4, a5), TP_STRUCT__entry( __field(u8, cpl) __field(unsigned long, nr) __field(unsigned long, a0) __field(unsigned long, a1) __field(unsigned long, a2) __field(unsigned long, a3) __field(unsigned long, a4) __field(unsigned long, a5) ), TP_fast_assign( __entry->cpl = cpl; __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; __entry->a2 = a2; __entry->a3 = a3; __entry->a4 = a4; __entry->a4 = a5; ), TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", __entry->cpl, __entry->nr, __entry->a0, __entry->a1, __entry->a2, __entry->a3, __entry->a4, __entry->a5) ); /* * Tracepoint for PIO. */ #define KVM_PIO_IN 0 #define KVM_PIO_OUT 1 TRACE_EVENT(kvm_pio, TP_PROTO(unsigned int rw, unsigned int port, unsigned int size, unsigned int count, const void *data), TP_ARGS(rw, port, size, count, data), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, port ) __field( unsigned int, size ) __field( unsigned int, count ) __field( unsigned int, val ) ), TP_fast_assign( __entry->rw = rw; __entry->port = port; __entry->size = size; __entry->count = count; if (size == 1) __entry->val = *(unsigned char *)data; else if (size == 2) __entry->val = *(unsigned short *)data; else __entry->val = *(unsigned int *)data; ), TP_printk("pio_%s at 0x%x size %d count %d val 0x%x %s", __entry->rw ? "write" : "read", __entry->port, __entry->size, __entry->count, __entry->val, __entry->count > 1 ? "(...)" : "") ); /* * Tracepoint for fast mmio. */ TRACE_EVENT(kvm_fast_mmio, TP_PROTO(u64 gpa), TP_ARGS(gpa), TP_STRUCT__entry( __field(u64, gpa) ), TP_fast_assign( __entry->gpa = gpa; ), TP_printk("fast mmio at gpa 0x%llx", __entry->gpa) ); /* * Tracepoint for cpuid. */ TRACE_EVENT(kvm_cpuid, TP_PROTO(unsigned int function, unsigned int index, unsigned long rax, unsigned long rbx, unsigned long rcx, unsigned long rdx, bool found, bool used_max_basic), TP_ARGS(function, index, rax, rbx, rcx, rdx, found, used_max_basic), TP_STRUCT__entry( __field( unsigned int, function ) __field( unsigned int, index ) __field( unsigned long, rax ) __field( unsigned long, rbx ) __field( unsigned long, rcx ) __field( unsigned long, rdx ) __field( bool, found ) __field( bool, used_max_basic ) ), TP_fast_assign( __entry->function = function; __entry->index = index; __entry->rax = rax; __entry->rbx = rbx; __entry->rcx = rcx; __entry->rdx = rdx; __entry->found = found; __entry->used_max_basic = used_max_basic; ), TP_printk("func %x idx %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s%s", __entry->function, __entry->index, __entry->rax, __entry->rbx, __entry->rcx, __entry->rdx, __entry->found ? "found" : "not found", __entry->used_max_basic ? ", used max basic" : "") ); #define AREG(x) { APIC_##x, "APIC_" #x } #define kvm_trace_symbol_apic \ AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \ AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \ AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \ AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \ AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \ AREG(ECTRL) /* * Tracepoint for apic access. */ TRACE_EVENT(kvm_apic, TP_PROTO(unsigned int rw, unsigned int reg, u64 val), TP_ARGS(rw, reg, val), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, reg ) __field( u64, val ) ), TP_fast_assign( __entry->rw = rw; __entry->reg = reg; __entry->val = val; ), TP_printk("apic_%s %s = 0x%llx", __entry->rw ? "write" : "read", __print_symbolic(__entry->reg, kvm_trace_symbol_apic), __entry->val) ); #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 #define kvm_print_exit_reason(exit_reason, isa) \ (isa == KVM_ISA_VMX) ? \ __print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \ __print_symbolic(exit_reason, SVM_EXIT_REASONS), \ (isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \ (isa == KVM_ISA_VMX) ? \ __print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : "" #define TRACE_EVENT_KVM_EXIT(name) \ TRACE_EVENT(name, \ TP_PROTO(struct kvm_vcpu *vcpu, u32 isa), \ TP_ARGS(vcpu, isa), \ \ TP_STRUCT__entry( \ __field( unsigned int, exit_reason ) \ __field( unsigned long, guest_rip ) \ __field( u32, isa ) \ __field( u64, info1 ) \ __field( u64, info2 ) \ __field( u32, intr_info ) \ __field( u32, error_code ) \ __field( unsigned int, vcpu_id ) \ __field( u64, requests ) \ ), \ \ TP_fast_assign( \ __entry->guest_rip = kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ kvm_x86_call(get_exit_info)(vcpu, \ &__entry->exit_reason, \ &__entry->info1, \ &__entry->info2, \ &__entry->intr_info, \ &__entry->error_code); \ ), \ \ TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \ "requests 0x%016llx", \ __entry->vcpu_id, \ kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \ __entry->guest_rip, __entry->info1, __entry->info2, \ __entry->intr_info, __entry->error_code, \ __entry->requests) \ ) /* * Tracepoint for kvm guest exit: */ TRACE_EVENT_KVM_EXIT(kvm_exit); /* * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_virq, TP_PROTO(unsigned int vector, bool soft, bool reinjected), TP_ARGS(vector, soft, reinjected), TP_STRUCT__entry( __field( unsigned int, vector ) __field( bool, soft ) __field( bool, reinjected ) ), TP_fast_assign( __entry->vector = vector; __entry->soft = soft; __entry->reinjected = reinjected; ), TP_printk("%s 0x%x%s", __entry->soft ? "Soft/INTn" : "IRQ", __entry->vector, __entry->reinjected ? " [reinjected]" : "") ); #define EXS(x) { x##_VECTOR, "#" #x } #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_exception, TP_PROTO(unsigned exception, bool has_error, unsigned error_code, bool reinjected), TP_ARGS(exception, has_error, error_code, reinjected), TP_STRUCT__entry( __field( u8, exception ) __field( u8, has_error ) __field( u32, error_code ) __field( bool, reinjected ) ), TP_fast_assign( __entry->exception = exception; __entry->has_error = has_error; __entry->error_code = error_code; __entry->reinjected = reinjected; ), TP_printk("%s%s%s%s%s", __print_symbolic(__entry->exception, kvm_trace_sym_exc), !__entry->has_error ? "" : " (", !__entry->has_error ? "" : __print_symbolic(__entry->error_code, { }), !__entry->has_error ? "" : ")", __entry->reinjected ? " [reinjected]" : "") ); /* * Tracepoint for page fault. */ TRACE_EVENT(kvm_page_fault, TP_PROTO(struct kvm_vcpu *vcpu, u64 fault_address, u64 error_code), TP_ARGS(vcpu, fault_address, error_code), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned long, guest_rip ) __field( u64, fault_address ) __field( u64, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->guest_rip = kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), TP_printk("vcpu %u rip 0x%lx address 0x%016llx error_code 0x%llx", __entry->vcpu_id, __entry->guest_rip, __entry->fault_address, __entry->error_code) ); /* * Tracepoint for guest MSR access. */ TRACE_EVENT(kvm_msr, TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), TP_ARGS(write, ecx, data, exception), TP_STRUCT__entry( __field( unsigned, write ) __field( u32, ecx ) __field( u64, data ) __field( u8, exception ) ), TP_fast_assign( __entry->write = write; __entry->ecx = ecx; __entry->data = data; __entry->exception = exception; ), TP_printk("msr_%s %x = 0x%llx%s", __entry->write ? "write" : "read", __entry->ecx, __entry->data, __entry->exception ? " (#GP)" : "") ); #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) /* * Tracepoint for guest CR access. */ TRACE_EVENT(kvm_cr, TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val), TP_ARGS(rw, cr, val), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, cr ) __field( unsigned long, val ) ), TP_fast_assign( __entry->rw = rw; __entry->cr = cr; __entry->val = val; ), TP_printk("cr_%s %x = 0x%lx", __entry->rw ? "write" : "read", __entry->cr, __entry->val) ); #define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val) #define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val) TRACE_EVENT(kvm_pic_set_irq, TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced), TP_ARGS(chip, pin, elcr, imr, coalesced), TP_STRUCT__entry( __field( __u8, chip ) __field( __u8, pin ) __field( __u8, elcr ) __field( __u8, imr ) __field( bool, coalesced ) ), TP_fast_assign( __entry->chip = chip; __entry->pin = pin; __entry->elcr = elcr; __entry->imr = imr; __entry->coalesced = coalesced; ), TP_printk("chip %u pin %u (%s%s)%s", __entry->chip, __entry->pin, (__entry->elcr & (1 << __entry->pin)) ? "level":"edge", (__entry->imr & (1 << __entry->pin)) ? "|masked":"", __entry->coalesced ? " (coalesced)" : "") ); #define kvm_apic_dst_shorthand \ {0x0, "dst"}, \ {0x1, "self"}, \ {0x2, "all"}, \ {0x3, "all-but-self"} TRACE_EVENT(kvm_apic_ipi, TP_PROTO(__u32 icr_low, __u32 dest_id), TP_ARGS(icr_low, dest_id), TP_STRUCT__entry( __field( __u32, icr_low ) __field( __u32, dest_id ) ), TP_fast_assign( __entry->icr_low = icr_low; __entry->dest_id = dest_id; ), TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)", __entry->dest_id, (u8)__entry->icr_low, __print_symbolic((__entry->icr_low >> 8 & 0x7), kvm_deliver_mode), (__entry->icr_low & (1<<11)) ? "logical" : "physical", (__entry->icr_low & (1<<14)) ? "assert" : "de-assert", (__entry->icr_low & (1<<15)) ? "level" : "edge", __print_symbolic((__entry->icr_low >> 18 & 0x3), kvm_apic_dst_shorthand)) ); TRACE_EVENT(kvm_apic_accept_irq, TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u16, tm ) __field( __u8, vec ) ), TP_fast_assign( __entry->apicid = apicid; __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; ), TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), __entry->tm ? "level" : "edge") ); TRACE_EVENT(kvm_eoi, TP_PROTO(struct kvm_lapic *apic, int vector), TP_ARGS(apic, vector), TP_STRUCT__entry( __field( __u32, apicid ) __field( int, vector ) ), TP_fast_assign( __entry->apicid = apic->vcpu->vcpu_id; __entry->vector = vector; ), TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) ); TRACE_EVENT(kvm_pv_eoi, TP_PROTO(struct kvm_lapic *apic, int vector), TP_ARGS(apic, vector), TP_STRUCT__entry( __field( __u32, apicid ) __field( int, vector ) ), TP_fast_assign( __entry->apicid = apic->vcpu->vcpu_id; __entry->vector = vector; ), TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) ); /* * Tracepoint for nested VMRUN */ TRACE_EVENT(kvm_nested_vmenter, TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, __u32 event_inj, bool tdp_enabled, __u64 guest_tdp_pgd, __u64 guest_cr3, __u32 isa), TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, tdp_enabled, guest_tdp_pgd, guest_cr3, isa), TP_STRUCT__entry( __field( __u64, rip ) __field( __u64, vmcb ) __field( __u64, nested_rip ) __field( __u32, int_ctl ) __field( __u32, event_inj ) __field( bool, tdp_enabled ) __field( __u64, guest_pgd ) __field( __u32, isa ) ), TP_fast_assign( __entry->rip = rip; __entry->vmcb = vmcb; __entry->nested_rip = nested_rip; __entry->int_ctl = int_ctl; __entry->event_inj = event_inj; __entry->tdp_enabled = tdp_enabled; __entry->guest_pgd = tdp_enabled ? guest_tdp_pgd : guest_cr3; __entry->isa = isa; ), TP_printk("rip: 0x%016llx %s: 0x%016llx nested_rip: 0x%016llx " "int_ctl: 0x%08x event_inj: 0x%08x nested_%s=%s %s: 0x%016llx", __entry->rip, __entry->isa == KVM_ISA_VMX ? "vmcs" : "vmcb", __entry->vmcb, __entry->nested_rip, __entry->int_ctl, __entry->event_inj, __entry->isa == KVM_ISA_VMX ? "ept" : "npt", __entry->tdp_enabled ? "y" : "n", !__entry->tdp_enabled ? "guest_cr3" : __entry->isa == KVM_ISA_VMX ? "nested_eptp" : "nested_cr3", __entry->guest_pgd) ); TRACE_EVENT(kvm_nested_intercepts, TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u32 intercept1, __u32 intercept2, __u32 intercept3), TP_ARGS(cr_read, cr_write, exceptions, intercept1, intercept2, intercept3), TP_STRUCT__entry( __field( __u16, cr_read ) __field( __u16, cr_write ) __field( __u32, exceptions ) __field( __u32, intercept1 ) __field( __u32, intercept2 ) __field( __u32, intercept3 ) ), TP_fast_assign( __entry->cr_read = cr_read; __entry->cr_write = cr_write; __entry->exceptions = exceptions; __entry->intercept1 = intercept1; __entry->intercept2 = intercept2; __entry->intercept3 = intercept3; ), TP_printk("cr_read: %04x cr_write: %04x excp: %08x " "intercepts: %08x %08x %08x", __entry->cr_read, __entry->cr_write, __entry->exceptions, __entry->intercept1, __entry->intercept2, __entry->intercept3) ); /* * Tracepoint for #VMEXIT while nested */ TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit); /* * Tracepoint for #VMEXIT reinjected to the guest */ TRACE_EVENT(kvm_nested_vmexit_inject, TP_PROTO(__u32 exit_code, __u64 exit_info1, __u64 exit_info2, __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), TP_ARGS(exit_code, exit_info1, exit_info2, exit_int_info, exit_int_info_err, isa), TP_STRUCT__entry( __field( __u32, exit_code ) __field( __u64, exit_info1 ) __field( __u64, exit_info2 ) __field( __u32, exit_int_info ) __field( __u32, exit_int_info_err ) __field( __u32, isa ) ), TP_fast_assign( __entry->exit_code = exit_code; __entry->exit_info1 = exit_info1; __entry->exit_info2 = exit_info2; __entry->exit_int_info = exit_int_info; __entry->exit_int_info_err = exit_int_info_err; __entry->isa = isa; ), TP_printk("reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", kvm_print_exit_reason(__entry->exit_code, __entry->isa), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_nested_intr_vmexit, TP_PROTO(__u64 rip), TP_ARGS(rip), TP_STRUCT__entry( __field( __u64, rip ) ), TP_fast_assign( __entry->rip = rip ), TP_printk("rip: 0x%016llx", __entry->rip) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( __field( __u64, rip ) __field( unsigned int, asid ) __field( __u64, address ) ), TP_fast_assign( __entry->rip = rip; __entry->asid = asid; __entry->address = address; ), TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_skinit, TP_PROTO(__u64 rip, __u32 slb), TP_ARGS(rip, slb), TP_STRUCT__entry( __field( __u64, rip ) __field( __u32, slb ) ), TP_fast_assign( __entry->rip = rip; __entry->slb = slb; ), TP_printk("rip: 0x%016llx slb: 0x%08x", __entry->rip, __entry->slb) ); #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) #define KVM_EMUL_INSN_F_CS_D (1 << 2) #define KVM_EMUL_INSN_F_CS_L (1 << 3) #define kvm_trace_symbol_emul_flags \ { 0, "real" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_L, "prot64" } #define kei_decode_mode(mode) ({ \ u8 flags = 0xff; \ switch (mode) { \ case X86EMUL_MODE_REAL: \ flags = 0; \ break; \ case X86EMUL_MODE_VM86: \ flags = KVM_EMUL_INSN_F_EFL_VM; \ break; \ case X86EMUL_MODE_PROT16: \ flags = KVM_EMUL_INSN_F_CR0_PE; \ break; \ case X86EMUL_MODE_PROT32: \ flags = KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_D; \ break; \ case X86EMUL_MODE_PROT64: \ flags = KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_L; \ break; \ } \ flags; \ }) TRACE_EVENT(kvm_emulate_insn, TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), TP_ARGS(vcpu, failed), TP_STRUCT__entry( __field( __u64, rip ) __field( __u32, csbase ) __field( __u8, len ) __array( __u8, insn, 15 ) __field( __u8, flags ) __field( __u8, failed ) ), TP_fast_assign( __entry->csbase = kvm_x86_call(get_segment_base)(vcpu, VCPU_SREG_CS); __entry->len = vcpu->arch.emulate_ctxt->fetch.ptr - vcpu->arch.emulate_ctxt->fetch.data; __entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len; memcpy(__entry->insn, vcpu->arch.emulate_ctxt->fetch.data, 15); __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt->mode); __entry->failed = failed; ), TP_printk("%x:%llx:%s (%s)%s", __entry->csbase, __entry->rip, __print_hex(__entry->insn, __entry->len), __print_symbolic(__entry->flags, kvm_trace_symbol_emul_flags), __entry->failed ? " failed" : "" ) ); #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) TRACE_EVENT( vcpu_match_mmio, TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match), TP_ARGS(gva, gpa, write, gpa_match), TP_STRUCT__entry( __field(gva_t, gva) __field(gpa_t, gpa) __field(bool, write) __field(bool, gpa_match) ), TP_fast_assign( __entry->gva = gva; __entry->gpa = gpa; __entry->write = write; __entry->gpa_match = gpa_match ), TP_printk("gva %#lx gpa %#llx %s %s", __entry->gva, __entry->gpa, __entry->write ? "Write" : "Read", __entry->gpa_match ? "GPA" : "GVA") ); TRACE_EVENT(kvm_write_tsc_offset, TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset, __u64 next_tsc_offset), TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( __u64, previous_tsc_offset ) __field( __u64, next_tsc_offset ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->previous_tsc_offset = previous_tsc_offset; __entry->next_tsc_offset = next_tsc_offset; ), TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id, __entry->previous_tsc_offset, __entry->next_tsc_offset) ); #ifdef CONFIG_X86_64 #define host_clocks \ {VDSO_CLOCKMODE_NONE, "none"}, \ {VDSO_CLOCKMODE_TSC, "tsc"} \ TRACE_EVENT(kvm_update_master_clock, TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), TP_ARGS(use_master_clock, host_clock, offset_matched), TP_STRUCT__entry( __field( bool, use_master_clock ) __field( unsigned int, host_clock ) __field( bool, offset_matched ) ), TP_fast_assign( __entry->use_master_clock = use_master_clock; __entry->host_clock = host_clock; __entry->offset_matched = offset_matched; ), TP_printk("masterclock %d hostclock %s offsetmatched %u", __entry->use_master_clock, __print_symbolic(__entry->host_clock, host_clocks), __entry->offset_matched) ); TRACE_EVENT(kvm_track_tsc, TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched, unsigned int online_vcpus, bool use_master_clock, unsigned int host_clock), TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock, host_clock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, nr_vcpus_matched_tsc ) __field( unsigned int, online_vcpus ) __field( bool, use_master_clock ) __field( unsigned int, host_clock ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->nr_vcpus_matched_tsc = nr_matched; __entry->online_vcpus = online_vcpus; __entry->use_master_clock = use_master_clock; __entry->host_clock = host_clock; ), TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u" " hostclock %s", __entry->vcpu_id, __entry->use_master_clock, __entry->nr_vcpus_matched_tsc, __entry->online_vcpus, __print_symbolic(__entry->host_clock, host_clocks)) ); #endif /* CONFIG_X86_64 */ /* * Tracepoint for PML full VMEXIT. */ TRACE_EVENT(kvm_pml_full, TP_PROTO(unsigned int vcpu_id), TP_ARGS(vcpu_id), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; ), TP_printk("vcpu %d: PML full", __entry->vcpu_id) ); TRACE_EVENT(kvm_ple_window_update, TP_PROTO(unsigned int vcpu_id, unsigned int new, unsigned int old), TP_ARGS(vcpu_id, new, old), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, new ) __field( unsigned int, old ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->new = new; __entry->old = old; ), TP_printk("vcpu %u old %u new %u (%s)", __entry->vcpu_id, __entry->old, __entry->new, __entry->old < __entry->new ? "growed" : "shrinked") ); TRACE_EVENT(kvm_pvclock_update, TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock), TP_ARGS(vcpu_id, pvclock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( __u32, version ) __field( __u64, tsc_timestamp ) __field( __u64, system_time ) __field( __u32, tsc_to_system_mul ) __field( __s8, tsc_shift ) __field( __u8, flags ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->version = pvclock->version; __entry->tsc_timestamp = pvclock->tsc_timestamp; __entry->system_time = pvclock->system_time; __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul; __entry->tsc_shift = pvclock->tsc_shift; __entry->flags = pvclock->flags; ), TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, " "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, " "flags 0x%x }", __entry->vcpu_id, __entry->version, __entry->tsc_timestamp, __entry->system_time, __entry->tsc_to_system_mul, __entry->tsc_shift, __entry->flags) ); TRACE_EVENT(kvm_wait_lapic_expire, TP_PROTO(unsigned int vcpu_id, s64 delta), TP_ARGS(vcpu_id, delta), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( s64, delta ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->delta = delta; ), TP_printk("vcpu %u: delta %lld (%s)", __entry->vcpu_id, __entry->delta, __entry->delta < 0 ? "early" : "late") ); TRACE_EVENT(kvm_smm_transition, TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering), TP_ARGS(vcpu_id, smbase, entering), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( u64, smbase ) __field( bool, entering ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->smbase = smbase; __entry->entering = entering; ), TP_printk("vcpu %u: %s SMM, smbase 0x%llx", __entry->vcpu_id, __entry->entering ? "entering" : "leaving", __entry->smbase) ); /* * Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC. */ TRACE_EVENT(kvm_pi_irte_update, TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, unsigned int gsi, unsigned int gvec, u64 pi_desc_addr, bool set), TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set), TP_STRUCT__entry( __field( unsigned int, host_irq ) __field( unsigned int, vcpu_id ) __field( unsigned int, gsi ) __field( unsigned int, gvec ) __field( u64, pi_desc_addr ) __field( bool, set ) ), TP_fast_assign( __entry->host_irq = host_irq; __entry->vcpu_id = vcpu_id; __entry->gsi = gsi; __entry->gvec = gvec; __entry->pi_desc_addr = pi_desc_addr; __entry->set = set; ), TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, " "gvec: 0x%x, pi_desc_addr: 0x%llx", __entry->set ? "enabled and being updated" : "disabled", __entry->host_irq, __entry->vcpu_id, __entry->gsi, __entry->gvec, __entry->pi_desc_addr) ); /* * Tracepoint for kvm_hv_notify_acked_sint. */ TRACE_EVENT(kvm_hv_notify_acked_sint, TP_PROTO(int vcpu_id, u32 sint), TP_ARGS(vcpu_id, sint), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->sint = sint; ), TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint) ); /* * Tracepoint for synic_set_irq. */ TRACE_EVENT(kvm_hv_synic_set_irq, TP_PROTO(int vcpu_id, u32 sint, int vector, int ret), TP_ARGS(vcpu_id, sint, vector, ret), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) __field(int, vector) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->sint = sint; __entry->vector = vector; __entry->ret = ret; ), TP_printk("vcpu_id %d sint %u vector %d ret %d", __entry->vcpu_id, __entry->sint, __entry->vector, __entry->ret) ); /* * Tracepoint for kvm_hv_synic_send_eoi. */ TRACE_EVENT(kvm_hv_synic_send_eoi, TP_PROTO(int vcpu_id, int vector), TP_ARGS(vcpu_id, vector), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) __field(int, vector) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vector = vector; ), TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector) ); /* * Tracepoint for synic_set_msr. */ TRACE_EVENT(kvm_hv_synic_set_msr, TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host), TP_ARGS(vcpu_id, msr, data, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, msr) __field(u64, data) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->msr = msr; __entry->data = data; __entry->host = host ), TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d", __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) ); /* * Tracepoint for stimer_set_config. */ TRACE_EVENT(kvm_hv_stimer_set_config, TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host), TP_ARGS(vcpu_id, timer_index, config, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, config) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->config = config; __entry->host = host; ), TP_printk("vcpu_id %d timer %d config 0x%llx host %d", __entry->vcpu_id, __entry->timer_index, __entry->config, __entry->host) ); /* * Tracepoint for stimer_set_count. */ TRACE_EVENT(kvm_hv_stimer_set_count, TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host), TP_ARGS(vcpu_id, timer_index, count, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, count) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->count = count; __entry->host = host; ), TP_printk("vcpu_id %d timer %d count %llu host %d", __entry->vcpu_id, __entry->timer_index, __entry->count, __entry->host) ); /* * Tracepoint for stimer_start(periodic timer case). */ TRACE_EVENT(kvm_hv_stimer_start_periodic, TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time), TP_ARGS(vcpu_id, timer_index, time_now, exp_time), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, time_now) __field(u64, exp_time) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->time_now = time_now; __entry->exp_time = exp_time; ), TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu", __entry->vcpu_id, __entry->timer_index, __entry->time_now, __entry->exp_time) ); /* * Tracepoint for stimer_start(one-shot timer case). */ TRACE_EVENT(kvm_hv_stimer_start_one_shot, TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count), TP_ARGS(vcpu_id, timer_index, time_now, count), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, time_now) __field(u64, count) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->time_now = time_now; __entry->count = count; ), TP_printk("vcpu_id %d timer %d time_now %llu count %llu", __entry->vcpu_id, __entry->timer_index, __entry->time_now, __entry->count) ); /* * Tracepoint for stimer_timer_callback. */ TRACE_EVENT(kvm_hv_stimer_callback, TP_PROTO(int vcpu_id, int timer_index), TP_ARGS(vcpu_id, timer_index), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; ), TP_printk("vcpu_id %d timer %d", __entry->vcpu_id, __entry->timer_index) ); /* * Tracepoint for stimer_expiration. */ TRACE_EVENT(kvm_hv_stimer_expiration, TP_PROTO(int vcpu_id, int timer_index, int direct, int msg_send_result), TP_ARGS(vcpu_id, timer_index, direct, msg_send_result), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(int, direct) __field(int, msg_send_result) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->direct = direct; __entry->msg_send_result = msg_send_result; ), TP_printk("vcpu_id %d timer %d direct %d send result %d", __entry->vcpu_id, __entry->timer_index, __entry->direct, __entry->msg_send_result) ); /* * Tracepoint for stimer_cleanup. */ TRACE_EVENT(kvm_hv_stimer_cleanup, TP_PROTO(int vcpu_id, int timer_index), TP_ARGS(vcpu_id, timer_index), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; ), TP_printk("vcpu_id %d timer %d", __entry->vcpu_id, __entry->timer_index) ); #define kvm_print_apicv_inhibit_reasons(inhibits) \ (inhibits), (inhibits) ? " " : "", \ (inhibits) ? __print_flags(inhibits, "|", APICV_INHIBIT_REASONS) : "" TRACE_EVENT(kvm_apicv_inhibit_changed, TP_PROTO(int reason, bool set, unsigned long inhibits), TP_ARGS(reason, set, inhibits), TP_STRUCT__entry( __field(int, reason) __field(bool, set) __field(unsigned long, inhibits) ), TP_fast_assign( __entry->reason = reason; __entry->set = set; __entry->inhibits = inhibits; ), TP_printk("%s reason=%u, inhibits=0x%lx%s%s", __entry->set ? "set" : "cleared", __entry->reason, kvm_print_apicv_inhibit_reasons(__entry->inhibits)) ); TRACE_EVENT(kvm_apicv_accept_irq, TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u16, tm ) __field( __u8, vec ) ), TP_fast_assign( __entry->apicid = apicid; __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; ), TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), __entry->tm ? "level" : "edge") ); /* * Tracepoint for AMD AVIC */ TRACE_EVENT(kvm_avic_incomplete_ipi, TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index), TP_ARGS(vcpu, icrh, icrl, id, index), TP_STRUCT__entry( __field(u32, vcpu) __field(u32, icrh) __field(u32, icrl) __field(u32, id) __field(u32, index) ), TP_fast_assign( __entry->vcpu = vcpu; __entry->icrh = icrh; __entry->icrl = icrl; __entry->id = id; __entry->index = index; ), TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u", __entry->vcpu, __entry->icrh, __entry->icrl, __entry->id, __entry->index) ); TRACE_EVENT(kvm_avic_unaccelerated_access, TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec), TP_ARGS(vcpu, offset, ft, rw, vec), TP_STRUCT__entry( __field(u32, vcpu) __field(u32, offset) __field(bool, ft) __field(bool, rw) __field(u32, vec) ), TP_fast_assign( __entry->vcpu = vcpu; __entry->offset = offset; __entry->ft = ft; __entry->rw = rw; __entry->vec = vec; ), TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x", __entry->vcpu, __entry->offset, __print_symbolic(__entry->offset, kvm_trace_symbol_apic), __entry->ft ? "trap" : "fault", __entry->rw ? "write" : "read", __entry->vec) ); TRACE_EVENT(kvm_avic_ga_log, TP_PROTO(u32 vmid, u32 vcpuid), TP_ARGS(vmid, vcpuid), TP_STRUCT__entry( __field(u32, vmid) __field(u32, vcpuid) ), TP_fast_assign( __entry->vmid = vmid; __entry->vcpuid = vcpuid; ), TP_printk("vmid=%u, vcpuid=%u", __entry->vmid, __entry->vcpuid) ); TRACE_EVENT(kvm_avic_kick_vcpu_slowpath, TP_PROTO(u32 icrh, u32 icrl, u32 index), TP_ARGS(icrh, icrl, index), TP_STRUCT__entry( __field(u32, icrh) __field(u32, icrl) __field(u32, index) ), TP_fast_assign( __entry->icrh = icrh; __entry->icrl = icrl; __entry->index = index; ), TP_printk("icrh:icrl=%#08x:%08x, index=%u", __entry->icrh, __entry->icrl, __entry->index) ); TRACE_EVENT(kvm_avic_doorbell, TP_PROTO(u32 vcpuid, u32 apicid), TP_ARGS(vcpuid, apicid), TP_STRUCT__entry( __field(u32, vcpuid) __field(u32, apicid) ), TP_fast_assign( __entry->vcpuid = vcpuid; __entry->apicid = apicid; ), TP_printk("vcpuid=%u, apicid=%u", __entry->vcpuid, __entry->apicid) ); TRACE_EVENT(kvm_hv_timer_state, TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use), TP_ARGS(vcpu_id, hv_timer_in_use), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(unsigned int, hv_timer_in_use) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->hv_timer_in_use = hv_timer_in_use; ), TP_printk("vcpu_id %x hv_timer %x", __entry->vcpu_id, __entry->hv_timer_in_use) ); /* * Tracepoint for kvm_hv_flush_tlb. */ TRACE_EVENT(kvm_hv_flush_tlb, TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode), TP_ARGS(processor_mask, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, processor_mask) __field(u64, address_space) __field(u64, flags) __field(bool, guest_mode) ), TP_fast_assign( __entry->processor_mask = processor_mask; __entry->address_space = address_space; __entry->flags = flags; __entry->guest_mode = guest_mode; ), TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s", __entry->processor_mask, __entry->address_space, __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoint for kvm_hv_flush_tlb_ex. */ TRACE_EVENT(kvm_hv_flush_tlb_ex, TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode), TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, valid_bank_mask) __field(u64, format) __field(u64, address_space) __field(u64, flags) __field(bool, guest_mode) ), TP_fast_assign( __entry->valid_bank_mask = valid_bank_mask; __entry->format = format; __entry->address_space = address_space; __entry->flags = flags; __entry->guest_mode = guest_mode; ), TP_printk("valid_bank_mask 0x%llx format 0x%llx " "address_space 0x%llx flags 0x%llx %s", __entry->valid_bank_mask, __entry->format, __entry->address_space, __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoints for kvm_hv_send_ipi. */ TRACE_EVENT(kvm_hv_send_ipi, TP_PROTO(u32 vector, u64 processor_mask), TP_ARGS(vector, processor_mask), TP_STRUCT__entry( __field(u32, vector) __field(u64, processor_mask) ), TP_fast_assign( __entry->vector = vector; __entry->processor_mask = processor_mask; ), TP_printk("vector %x processor_mask 0x%llx", __entry->vector, __entry->processor_mask) ); TRACE_EVENT(kvm_hv_send_ipi_ex, TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask), TP_ARGS(vector, format, valid_bank_mask), TP_STRUCT__entry( __field(u32, vector) __field(u64, format) __field(u64, valid_bank_mask) ), TP_fast_assign( __entry->vector = vector; __entry->format = format; __entry->valid_bank_mask = valid_bank_mask; ), TP_printk("vector %x format %llx valid_bank_mask 0x%llx", __entry->vector, __entry->format, __entry->valid_bank_mask) ); TRACE_EVENT(kvm_pv_tlb_flush, TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb), TP_ARGS(vcpu_id, need_flush_tlb), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( bool, need_flush_tlb ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->need_flush_tlb = need_flush_tlb; ), TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id, __entry->need_flush_tlb ? "true" : "false") ); /* * Tracepoint for failed nested VMX VM-Enter. */ TRACE_EVENT(kvm_nested_vmenter_failed, TP_PROTO(const char *msg, u32 err), TP_ARGS(msg, err), TP_STRUCT__entry( __string(msg, msg) __field(u32, err) ), TP_fast_assign( __assign_str(msg); __entry->err = err; ), TP_printk("%s%s", __get_str(msg), !__entry->err ? "" : __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); /* * Tracepoint for syndbg_set_msr. */ TRACE_EVENT(kvm_hv_syndbg_set_msr, TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), TP_ARGS(vcpu_id, vp_index, msr, data), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, vp_index) __field(u32, msr) __field(u64, data) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vp_index = vp_index; __entry->msr = msr; __entry->data = data; ), TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); /* * Tracepoint for syndbg_get_msr. */ TRACE_EVENT(kvm_hv_syndbg_get_msr, TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), TP_ARGS(vcpu_id, vp_index, msr, data), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, vp_index) __field(u32, msr) __field(u64, data) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vp_index = vp_index; __entry->msr = msr; __entry->data = data; ), TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); /* * Tracepoint for the start of VMGEXIT processing */ TRACE_EVENT(kvm_vmgexit_enter, TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), TP_ARGS(vcpu_id, ghcb), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, exit_reason) __field(u64, info1) __field(u64, info2) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->exit_reason = ghcb->save.sw_exit_code; __entry->info1 = ghcb->save.sw_exit_info_1; __entry->info2 = ghcb->save.sw_exit_info_2; ), TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", __entry->vcpu_id, __entry->exit_reason, __entry->info1, __entry->info2) ); /* * Tracepoint for the end of VMGEXIT processing */ TRACE_EVENT(kvm_vmgexit_exit, TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), TP_ARGS(vcpu_id, ghcb), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, exit_reason) __field(u64, info1) __field(u64, info2) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->exit_reason = ghcb->save.sw_exit_code; __entry->info1 = ghcb->save.sw_exit_info_1; __entry->info2 = ghcb->save.sw_exit_info_2; ), TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", __entry->vcpu_id, __entry->exit_reason, __entry->info1, __entry->info2) ); /* * Tracepoint for the start of VMGEXIT MSR procotol processing */ TRACE_EVENT(kvm_vmgexit_msr_protocol_enter, TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa), TP_ARGS(vcpu_id, ghcb_gpa), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, ghcb_gpa) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->ghcb_gpa = ghcb_gpa; ), TP_printk("vcpu %u, ghcb_gpa %016llx", __entry->vcpu_id, __entry->ghcb_gpa) ); /* * Tracepoint for the end of VMGEXIT MSR procotol processing */ TRACE_EVENT(kvm_vmgexit_msr_protocol_exit, TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result), TP_ARGS(vcpu_id, ghcb_gpa, result), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, ghcb_gpa) __field(int, result) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->ghcb_gpa = ghcb_gpa; __entry->result = result; ), TP_printk("vcpu %u, ghcb_gpa %016llx, result %d", __entry->vcpu_id, __entry->ghcb_gpa, __entry->result) ); /* * Tracepoint for #NPFs due to RMP faults. */ TRACE_EVENT(kvm_rmp_fault, TP_PROTO(struct kvm_vcpu *vcpu, u64 gpa, u64 pfn, u64 error_code, int rmp_level, int psmash_ret), TP_ARGS(vcpu, gpa, pfn, error_code, rmp_level, psmash_ret), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, gpa) __field(u64, pfn) __field(u64, error_code) __field(int, rmp_level) __field(int, psmash_ret) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->gpa = gpa; __entry->pfn = pfn; __entry->error_code = error_code; __entry->rmp_level = rmp_level; __entry->psmash_ret = psmash_ret; ), TP_printk("vcpu %u gpa %016llx pfn 0x%llx error_code 0x%llx rmp_level %d psmash_ret %d", __entry->vcpu_id, __entry->gpa, __entry->pfn, __entry->error_code, __entry->rmp_level, __entry->psmash_ret) ); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH ../../arch/x86/kvm #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace /* This part must be outside protection */ #include <trace/define_trace.h> |
3 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 | // SPDX-License-Identifier: GPL-2.0 /* * io_uring opcode handling table */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/io_uring.h> #include <linux/io_uring/cmd.h> #include "io_uring.h" #include "opdef.h" #include "refs.h" #include "tctx.h" #include "sqpoll.h" #include "fdinfo.h" #include "kbuf.h" #include "rsrc.h" #include "xattr.h" #include "nop.h" #include "fs.h" #include "splice.h" #include "sync.h" #include "advise.h" #include "openclose.h" #include "uring_cmd.h" #include "epoll.h" #include "statx.h" #include "net.h" #include "msg_ring.h" #include "timeout.h" #include "poll.h" #include "cancel.h" #include "rw.h" #include "waitid.h" #include "futex.h" #include "truncate.h" static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags) { WARN_ON_ONCE(1); return -ECANCELED; } static __maybe_unused int io_eopnotsupp_prep(struct io_kiocb *kiocb, const struct io_uring_sqe *sqe) { return -EOPNOTSUPP; } const struct io_issue_def io_issue_defs[] = { [IORING_OP_NOP] = { .audit_skip = 1, .iopoll = 1, .prep = io_nop_prep, .issue = io_nop, }, [IORING_OP_READV] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .plug = 1, .audit_skip = 1, .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, .vectored = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_readv, .issue = io_read, }, [IORING_OP_WRITEV] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, .audit_skip = 1, .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, .vectored = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_writev, .issue = io_write, }, [IORING_OP_FSYNC] = { .needs_file = 1, .audit_skip = 1, .prep = io_fsync_prep, .issue = io_fsync, }, [IORING_OP_READ_FIXED] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .plug = 1, .audit_skip = 1, .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_read_fixed, .issue = io_read, }, [IORING_OP_WRITE_FIXED] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, .audit_skip = 1, .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_write_fixed, .issue = io_write, }, [IORING_OP_POLL_ADD] = { .needs_file = 1, .unbound_nonreg_file = 1, .audit_skip = 1, .prep = io_poll_add_prep, .issue = io_poll_add, }, [IORING_OP_POLL_REMOVE] = { .audit_skip = 1, .prep = io_poll_remove_prep, .issue = io_poll_remove, }, [IORING_OP_SYNC_FILE_RANGE] = { .needs_file = 1, .audit_skip = 1, .prep = io_sfr_prep, .issue = io_sync_file_range, }, [IORING_OP_SENDMSG] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .ioprio = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_sendmsg_prep, .issue = io_sendmsg, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_RECVMSG] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .ioprio = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_recvmsg_prep, .issue = io_recvmsg, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_TIMEOUT] = { .audit_skip = 1, .async_size = sizeof(struct io_timeout_data), .prep = io_timeout_prep, .issue = io_timeout, }, [IORING_OP_TIMEOUT_REMOVE] = { /* used by timeout updates' prep() */ .audit_skip = 1, .prep = io_timeout_remove_prep, .issue = io_timeout_remove, }, [IORING_OP_ACCEPT] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .poll_exclusive = 1, .ioprio = 1, /* used for flags */ #if defined(CONFIG_NET) .prep = io_accept_prep, .issue = io_accept, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_ASYNC_CANCEL] = { .audit_skip = 1, .prep = io_async_cancel_prep, .issue = io_async_cancel, }, [IORING_OP_LINK_TIMEOUT] = { .audit_skip = 1, .async_size = sizeof(struct io_timeout_data), .prep = io_link_timeout_prep, .issue = io_no_issue, }, [IORING_OP_CONNECT] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_connect_prep, .issue = io_connect, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_FALLOCATE] = { .needs_file = 1, .prep = io_fallocate_prep, .issue = io_fallocate, }, [IORING_OP_OPENAT] = { .prep = io_openat_prep, .issue = io_openat, }, [IORING_OP_CLOSE] = { .prep = io_close_prep, .issue = io_close, }, [IORING_OP_FILES_UPDATE] = { .audit_skip = 1, .iopoll = 1, .prep = io_files_update_prep, .issue = io_files_update, }, [IORING_OP_STATX] = { .audit_skip = 1, .prep = io_statx_prep, .issue = io_statx, }, [IORING_OP_READ] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .plug = 1, .audit_skip = 1, .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_read, .issue = io_read, }, [IORING_OP_WRITE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .plug = 1, .audit_skip = 1, .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, .async_size = sizeof(struct io_async_rw), .prep = io_prep_write, .issue = io_write, }, [IORING_OP_FADVISE] = { .needs_file = 1, .audit_skip = 1, .prep = io_fadvise_prep, .issue = io_fadvise, }, [IORING_OP_MADVISE] = { .audit_skip = 1, .prep = io_madvise_prep, .issue = io_madvise, }, [IORING_OP_SEND] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .audit_skip = 1, .ioprio = 1, .buffer_select = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_sendmsg_prep, .issue = io_send, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_RECV] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .audit_skip = 1, .ioprio = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_recvmsg_prep, .issue = io_recv, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_OPENAT2] = { .prep = io_openat2_prep, .issue = io_openat2, }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, .audit_skip = 1, #if defined(CONFIG_EPOLL) .prep = io_epoll_ctl_prep, .issue = io_epoll_ctl, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_SPLICE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .audit_skip = 1, .prep = io_splice_prep, .issue = io_splice, }, [IORING_OP_PROVIDE_BUFFERS] = { .audit_skip = 1, .iopoll = 1, .prep = io_provide_buffers_prep, .issue = io_provide_buffers, }, [IORING_OP_REMOVE_BUFFERS] = { .audit_skip = 1, .iopoll = 1, .prep = io_remove_buffers_prep, .issue = io_remove_buffers, }, [IORING_OP_TEE] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, .audit_skip = 1, .prep = io_tee_prep, .issue = io_tee, }, [IORING_OP_SHUTDOWN] = { .needs_file = 1, #if defined(CONFIG_NET) .prep = io_shutdown_prep, .issue = io_shutdown, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_RENAMEAT] = { .prep = io_renameat_prep, .issue = io_renameat, }, [IORING_OP_UNLINKAT] = { .prep = io_unlinkat_prep, .issue = io_unlinkat, }, [IORING_OP_MKDIRAT] = { .prep = io_mkdirat_prep, .issue = io_mkdirat, }, [IORING_OP_SYMLINKAT] = { .prep = io_symlinkat_prep, .issue = io_symlinkat, }, [IORING_OP_LINKAT] = { .prep = io_linkat_prep, .issue = io_linkat, }, [IORING_OP_MSG_RING] = { .needs_file = 1, .iopoll = 1, .prep = io_msg_ring_prep, .issue = io_msg_ring, }, [IORING_OP_FSETXATTR] = { .needs_file = 1, .prep = io_fsetxattr_prep, .issue = io_fsetxattr, }, [IORING_OP_SETXATTR] = { .prep = io_setxattr_prep, .issue = io_setxattr, }, [IORING_OP_FGETXATTR] = { .needs_file = 1, .prep = io_fgetxattr_prep, .issue = io_fgetxattr, }, [IORING_OP_GETXATTR] = { .prep = io_getxattr_prep, .issue = io_getxattr, }, [IORING_OP_SOCKET] = { .audit_skip = 1, #if defined(CONFIG_NET) .prep = io_socket_prep, .issue = io_socket, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_URING_CMD] = { .needs_file = 1, .plug = 1, .iopoll = 1, .iopoll_queue = 1, .async_size = sizeof(struct io_uring_cmd_data), .prep = io_uring_cmd_prep, .issue = io_uring_cmd, }, [IORING_OP_SEND_ZC] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .audit_skip = 1, .ioprio = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_send_zc_prep, .issue = io_send_zc, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_SENDMSG_ZC] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .ioprio = 1, #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), .prep = io_send_zc_prep, .issue = io_sendmsg_zc, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_READ_MULTISHOT] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, .audit_skip = 1, .async_size = sizeof(struct io_async_rw), .prep = io_read_mshot_prep, .issue = io_read_mshot, }, [IORING_OP_WAITID] = { .async_size = sizeof(struct io_waitid_async), .prep = io_waitid_prep, .issue = io_waitid, }, [IORING_OP_FUTEX_WAIT] = { #if defined(CONFIG_FUTEX) .prep = io_futex_prep, .issue = io_futex_wait, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_FUTEX_WAKE] = { #if defined(CONFIG_FUTEX) .prep = io_futex_prep, .issue = io_futex_wake, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_FUTEX_WAITV] = { #if defined(CONFIG_FUTEX) .prep = io_futexv_prep, .issue = io_futexv_wait, #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_FIXED_FD_INSTALL] = { .needs_file = 1, .prep = io_install_fixed_fd_prep, .issue = io_install_fixed_fd, }, [IORING_OP_FTRUNCATE] = { .needs_file = 1, .hash_reg_file = 1, .prep = io_ftruncate_prep, .issue = io_ftruncate, }, [IORING_OP_BIND] = { #if defined(CONFIG_NET) .needs_file = 1, .prep = io_bind_prep, .issue = io_bind, .async_size = sizeof(struct io_async_msghdr), #else .prep = io_eopnotsupp_prep, #endif }, [IORING_OP_LISTEN] = { #if defined(CONFIG_NET) .needs_file = 1, .prep = io_listen_prep, .issue = io_listen, .async_size = sizeof(struct io_async_msghdr), #else .prep = io_eopnotsupp_prep, #endif }, }; const struct io_cold_def io_cold_defs[] = { [IORING_OP_NOP] = { .name = "NOP", }, [IORING_OP_READV] = { .name = "READV", .cleanup = io_readv_writev_cleanup, .fail = io_rw_fail, }, [IORING_OP_WRITEV] = { .name = "WRITEV", .cleanup = io_readv_writev_cleanup, .fail = io_rw_fail, }, [IORING_OP_FSYNC] = { .name = "FSYNC", }, [IORING_OP_READ_FIXED] = { .name = "READ_FIXED", .cleanup = io_readv_writev_cleanup, .fail = io_rw_fail, }, [IORING_OP_WRITE_FIXED] = { .name = "WRITE_FIXED", .cleanup = io_readv_writev_cleanup, .fail = io_rw_fail, }, [IORING_OP_POLL_ADD] = { .name = "POLL_ADD", }, [IORING_OP_POLL_REMOVE] = { .name = "POLL_REMOVE", }, [IORING_OP_SYNC_FILE_RANGE] = { .name = "SYNC_FILE_RANGE", }, [IORING_OP_SENDMSG] = { .name = "SENDMSG", #if defined(CONFIG_NET) .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, #endif }, [IORING_OP_RECVMSG] = { .name = "RECVMSG", #if defined(CONFIG_NET) .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, #endif }, [IORING_OP_TIMEOUT] = { .name = "TIMEOUT", }, [IORING_OP_TIMEOUT_REMOVE] = { .name = "TIMEOUT_REMOVE", }, [IORING_OP_ACCEPT] = { .name = "ACCEPT", }, [IORING_OP_ASYNC_CANCEL] = { .name = "ASYNC_CANCEL", }, [IORING_OP_LINK_TIMEOUT] = { .name = "LINK_TIMEOUT", }, [IORING_OP_CONNECT] = { .name = "CONNECT", }, [IORING_OP_FALLOCATE] = { .name = "FALLOCATE", }, [IORING_OP_OPENAT] = { .name = "OPENAT", .cleanup = io_open_cleanup, }, [IORING_OP_CLOSE] = { .name = "CLOSE", }, [IORING_OP_FILES_UPDATE] = { .name = "FILES_UPDATE", }, [IORING_OP_STATX] = { .name = "STATX", .cleanup = io_statx_cleanup, }, [IORING_OP_READ] = { .name = "READ", .cleanup = io_readv_writev_cleanup, .fail = io_rw_fail, }, [IORING_OP_WRITE] = { .name = "WRITE", .cleanup = io_readv_writev_cleanup, .fail = io_rw_fail, }, [IORING_OP_FADVISE] = { .name = "FADVISE", }, [IORING_OP_MADVISE] = { .name = "MADVISE", }, [IORING_OP_SEND] = { .name = "SEND", #if defined(CONFIG_NET) .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, #endif }, [IORING_OP_RECV] = { .name = "RECV", #if defined(CONFIG_NET) .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, #endif }, [IORING_OP_OPENAT2] = { .name = "OPENAT2", .cleanup = io_open_cleanup, }, [IORING_OP_EPOLL_CTL] = { .name = "EPOLL", }, [IORING_OP_SPLICE] = { .name = "SPLICE", .cleanup = io_splice_cleanup, }, [IORING_OP_PROVIDE_BUFFERS] = { .name = "PROVIDE_BUFFERS", }, [IORING_OP_REMOVE_BUFFERS] = { .name = "REMOVE_BUFFERS", }, [IORING_OP_TEE] = { .name = "TEE", .cleanup = io_splice_cleanup, }, [IORING_OP_SHUTDOWN] = { .name = "SHUTDOWN", }, [IORING_OP_RENAMEAT] = { .name = "RENAMEAT", .cleanup = io_renameat_cleanup, }, [IORING_OP_UNLINKAT] = { .name = "UNLINKAT", .cleanup = io_unlinkat_cleanup, }, [IORING_OP_MKDIRAT] = { .name = "MKDIRAT", .cleanup = io_mkdirat_cleanup, }, [IORING_OP_SYMLINKAT] = { .name = "SYMLINKAT", .cleanup = io_link_cleanup, }, [IORING_OP_LINKAT] = { .name = "LINKAT", .cleanup = io_link_cleanup, }, [IORING_OP_MSG_RING] = { .name = "MSG_RING", .cleanup = io_msg_ring_cleanup, }, [IORING_OP_FSETXATTR] = { .name = "FSETXATTR", .cleanup = io_xattr_cleanup, }, [IORING_OP_SETXATTR] = { .name = "SETXATTR", .cleanup = io_xattr_cleanup, }, [IORING_OP_FGETXATTR] = { .name = "FGETXATTR", .cleanup = io_xattr_cleanup, }, [IORING_OP_GETXATTR] = { .name = "GETXATTR", .cleanup = io_xattr_cleanup, }, [IORING_OP_SOCKET] = { .name = "SOCKET", }, [IORING_OP_URING_CMD] = { .name = "URING_CMD", }, [IORING_OP_SEND_ZC] = { .name = "SEND_ZC", #if defined(CONFIG_NET) .cleanup = io_send_zc_cleanup, .fail = io_sendrecv_fail, #endif }, [IORING_OP_SENDMSG_ZC] = { .name = "SENDMSG_ZC", #if defined(CONFIG_NET) .cleanup = io_send_zc_cleanup, .fail = io_sendrecv_fail, #endif }, [IORING_OP_READ_MULTISHOT] = { .name = "READ_MULTISHOT", .cleanup = io_readv_writev_cleanup, }, [IORING_OP_WAITID] = { .name = "WAITID", }, [IORING_OP_FUTEX_WAIT] = { .name = "FUTEX_WAIT", }, [IORING_OP_FUTEX_WAKE] = { .name = "FUTEX_WAKE", }, [IORING_OP_FUTEX_WAITV] = { .name = "FUTEX_WAITV", }, [IORING_OP_FIXED_FD_INSTALL] = { .name = "FIXED_FD_INSTALL", }, [IORING_OP_FTRUNCATE] = { .name = "FTRUNCATE", }, [IORING_OP_BIND] = { .name = "BIND", }, [IORING_OP_LISTEN] = { .name = "LISTEN", }, }; const char *io_uring_get_opcode(u8 opcode) { if (opcode < IORING_OP_LAST) return io_cold_defs[opcode].name; return "INVALID"; } bool io_uring_op_supported(u8 opcode) { if (opcode < IORING_OP_LAST && io_issue_defs[opcode].prep != io_eopnotsupp_prep) return true; return false; } void __init io_uring_optable_init(void) { int i; BUILD_BUG_ON(ARRAY_SIZE(io_cold_defs) != IORING_OP_LAST); BUILD_BUG_ON(ARRAY_SIZE(io_issue_defs) != IORING_OP_LAST); for (i = 0; i < ARRAY_SIZE(io_issue_defs); i++) { BUG_ON(!io_issue_defs[i].prep); if (io_issue_defs[i].prep != io_eopnotsupp_prep) BUG_ON(!io_issue_defs[i].issue); WARN_ON_ONCE(!io_cold_defs[i].name); } } |
14 14 1 14 2 2 1 1 1 11 12 11 12 1 1 1 1 1 1 1 1 1 10 10 1 10 1 1 1 10 10 1 7 7 2 2 1 5 10 10 11 11 2 2 2 1 1 2 2 2 2 1 10 6 5 11 11 2 11 11 9 2 10 11 9 1 3 7 11 11 1 11 11 2 1 1 8 9 8 2 7 1 1 6 6 6 2 2 2 9 9 1 9 8 9 2 1 1 2 1 1 6 1 5 2 1 1 2 1 1 5 1 4 9 9 2 3 1 1 1 1 1 1 25 26 26 3 1 1 2 6 6 6 6 6 6 1 4 9 9 9 8 2 7 7 1 3 1 2 2 2 1 2 1 1 3 1 2 1 1 1 1 4 1 1 1 2 2 2 1 1 1 2 2 1 1 4 4 1 2 1 12 13 13 12 1 5 5 4 1 7 1 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 | // SPDX-License-Identifier: GPL-2.0-only /* * vivid-vid-out.c - video output support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/videodev2.h> #include <linux/v4l2-dv-timings.h> #include <media/v4l2-common.h> #include <media/v4l2-event.h> #include <media/v4l2-dv-timings.h> #include <media/v4l2-rect.h> #include "vivid-core.h" #include "vivid-vid-common.h" #include "vivid-kthread-out.h" #include "vivid-vid-out.h" static int vid_out_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { struct vivid_dev *dev = vb2_get_drv_priv(vq); const struct vivid_fmt *vfmt = dev->fmt_out; unsigned planes = vfmt->buffers; unsigned h = dev->fmt_out_rect.height; unsigned int size = dev->bytesperline_out[0] * h + vfmt->data_offset[0]; unsigned p; for (p = vfmt->buffers; p < vfmt->planes; p++) size += dev->bytesperline_out[p] * h / vfmt->vdownsampling[p] + vfmt->data_offset[p]; if (dev->field_out == V4L2_FIELD_ALTERNATE) { /* * You cannot use write() with FIELD_ALTERNATE since the field * information (TOP/BOTTOM) cannot be passed to the kernel. */ if (vb2_fileio_is_active(vq)) return -EINVAL; } if (dev->queue_setup_error) { /* * Error injection: test what happens if queue_setup() returns * an error. */ dev->queue_setup_error = false; return -EINVAL; } if (*nplanes) { /* * Check if the number of requested planes match * the number of planes in the current format. You can't mix that. */ if (*nplanes != planes) return -EINVAL; if (sizes[0] < size) return -EINVAL; for (p = 1; p < planes; p++) { if (sizes[p] < dev->bytesperline_out[p] * h / vfmt->vdownsampling[p] + vfmt->data_offset[p]) return -EINVAL; } } else { for (p = 0; p < planes; p++) sizes[p] = p ? dev->bytesperline_out[p] * h / vfmt->vdownsampling[p] + vfmt->data_offset[p] : size; } *nplanes = planes; dprintk(dev, 1, "%s: count=%u\n", __func__, *nbuffers); for (p = 0; p < planes; p++) dprintk(dev, 1, "%s: size[%u]=%u\n", __func__, p, sizes[p]); return 0; } static int vid_out_buf_out_validate(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); dprintk(dev, 1, "%s\n", __func__); if (dev->field_out != V4L2_FIELD_ALTERNATE) vbuf->field = dev->field_out; else if (vbuf->field != V4L2_FIELD_TOP && vbuf->field != V4L2_FIELD_BOTTOM) return -EINVAL; return 0; } static int vid_out_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); const struct vivid_fmt *vfmt = dev->fmt_out; unsigned int planes = vfmt->buffers; unsigned int h = dev->fmt_out_rect.height; unsigned int size = dev->bytesperline_out[0] * h; unsigned p; for (p = vfmt->buffers; p < vfmt->planes; p++) size += dev->bytesperline_out[p] * h / vfmt->vdownsampling[p]; dprintk(dev, 1, "%s\n", __func__); if (WARN_ON(NULL == dev->fmt_out)) return -EINVAL; if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } for (p = 0; p < planes; p++) { if (p) size = dev->bytesperline_out[p] * h / vfmt->vdownsampling[p]; size += vb->planes[p].data_offset; if (vb2_get_plane_payload(vb, p) < size) { dprintk(dev, 1, "%s the payload is too small for plane %u (%lu < %u)\n", __func__, p, vb2_get_plane_payload(vb, p), size); return -EINVAL; } } return 0; } static void vid_out_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->vid_out_active); spin_unlock(&dev->slock); } static int vid_out_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); int err; dev->vid_out_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else { err = vivid_start_generating_vid_out(dev, &dev->vid_out_streaming); } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->vid_out_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void vid_out_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); dprintk(dev, 1, "%s\n", __func__); vivid_stop_generating_vid_out(dev, &dev->vid_out_streaming); } static void vid_out_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_vid_out); } const struct vb2_ops vivid_vid_out_qops = { .queue_setup = vid_out_queue_setup, .buf_out_validate = vid_out_buf_out_validate, .buf_prepare = vid_out_buf_prepare, .buf_queue = vid_out_buf_queue, .start_streaming = vid_out_start_streaming, .stop_streaming = vid_out_stop_streaming, .buf_request_complete = vid_out_buf_request_complete, }; /* * Called whenever the format has to be reset which can occur when * changing outputs, standard, timings, etc. */ void vivid_update_format_out(struct vivid_dev *dev) { struct v4l2_bt_timings *bt = &dev->dv_timings_out.bt; unsigned size, p; u64 pixelclock; switch (dev->output_type[dev->output]) { case SVID: default: dev->field_out = dev->tv_field_out; dev->sink_rect.width = 720; if (dev->std_out & V4L2_STD_525_60) { dev->sink_rect.height = 480; dev->timeperframe_vid_out = (struct v4l2_fract) { 1001, 30000 }; dev->service_set_out = V4L2_SLICED_CAPTION_525; } else { dev->sink_rect.height = 576; dev->timeperframe_vid_out = (struct v4l2_fract) { 1000, 25000 }; dev->service_set_out = V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; } dev->colorspace_out = V4L2_COLORSPACE_SMPTE170M; break; case HDMI: dev->sink_rect.width = bt->width; dev->sink_rect.height = bt->height; size = V4L2_DV_BT_FRAME_WIDTH(bt) * V4L2_DV_BT_FRAME_HEIGHT(bt); if (can_reduce_fps(bt) && (bt->flags & V4L2_DV_FL_REDUCED_FPS)) pixelclock = div_u64(bt->pixelclock * 1000, 1001); else pixelclock = bt->pixelclock; dev->timeperframe_vid_out = (struct v4l2_fract) { size / 100, (u32)pixelclock / 100 }; if (bt->interlaced) dev->field_out = V4L2_FIELD_ALTERNATE; else dev->field_out = V4L2_FIELD_NONE; if (!dev->dvi_d_out && (bt->flags & V4L2_DV_FL_IS_CE_VIDEO)) { if (bt->width == 720 && bt->height <= 576) dev->colorspace_out = V4L2_COLORSPACE_SMPTE170M; else dev->colorspace_out = V4L2_COLORSPACE_REC709; } else { dev->colorspace_out = V4L2_COLORSPACE_SRGB; } break; } dev->xfer_func_out = V4L2_XFER_FUNC_DEFAULT; dev->ycbcr_enc_out = V4L2_YCBCR_ENC_DEFAULT; dev->hsv_enc_out = V4L2_HSV_ENC_180; dev->quantization_out = V4L2_QUANTIZATION_DEFAULT; dev->compose_out = dev->sink_rect; dev->compose_bounds_out = dev->sink_rect; dev->crop_out = dev->compose_out; if (V4L2_FIELD_HAS_T_OR_B(dev->field_out)) dev->crop_out.height /= 2; dev->fmt_out_rect = dev->crop_out; for (p = 0; p < dev->fmt_out->planes; p++) dev->bytesperline_out[p] = (dev->sink_rect.width * dev->fmt_out->bit_depth[p]) / 8; } /* Map the field to something that is valid for the current output */ static enum v4l2_field vivid_field_out(struct vivid_dev *dev, enum v4l2_field field) { if (vivid_is_svid_out(dev)) { switch (field) { case V4L2_FIELD_INTERLACED_TB: case V4L2_FIELD_INTERLACED_BT: case V4L2_FIELD_SEQ_TB: case V4L2_FIELD_SEQ_BT: case V4L2_FIELD_ALTERNATE: return field; case V4L2_FIELD_INTERLACED: default: return V4L2_FIELD_INTERLACED; } } if (vivid_is_hdmi_out(dev)) return dev->dv_timings_out.bt.interlaced ? V4L2_FIELD_ALTERNATE : V4L2_FIELD_NONE; return V4L2_FIELD_NONE; } static enum tpg_pixel_aspect vivid_get_pixel_aspect(const struct vivid_dev *dev) { if (vivid_is_svid_out(dev)) return (dev->std_out & V4L2_STD_525_60) ? TPG_PIXEL_ASPECT_NTSC : TPG_PIXEL_ASPECT_PAL; if (vivid_is_hdmi_out(dev) && dev->sink_rect.width == 720 && dev->sink_rect.height <= 576) return dev->sink_rect.height == 480 ? TPG_PIXEL_ASPECT_NTSC : TPG_PIXEL_ASPECT_PAL; return TPG_PIXEL_ASPECT_SQUARE; } int vivid_g_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; const struct vivid_fmt *fmt = dev->fmt_out; unsigned p; mp->width = dev->fmt_out_rect.width; mp->height = dev->fmt_out_rect.height; mp->field = dev->field_out; mp->pixelformat = fmt->fourcc; mp->colorspace = dev->colorspace_out; mp->xfer_func = dev->xfer_func_out; mp->ycbcr_enc = dev->ycbcr_enc_out; mp->quantization = dev->quantization_out; mp->num_planes = fmt->buffers; for (p = 0; p < mp->num_planes; p++) { mp->plane_fmt[p].bytesperline = dev->bytesperline_out[p]; mp->plane_fmt[p].sizeimage = mp->plane_fmt[p].bytesperline * mp->height / fmt->vdownsampling[p] + fmt->data_offset[p]; } for (p = fmt->buffers; p < fmt->planes; p++) { unsigned stride = dev->bytesperline_out[p]; mp->plane_fmt[0].sizeimage += (stride * mp->height) / fmt->vdownsampling[p]; } return 0; } int vivid_try_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_bt_timings *bt = &dev->dv_timings_out.bt; struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; struct v4l2_plane_pix_format *pfmt = mp->plane_fmt; const struct vivid_fmt *fmt; unsigned bytesperline, max_bpl; unsigned factor = 1; unsigned w, h; unsigned p; fmt = vivid_get_format(dev, mp->pixelformat); if (!fmt) { dprintk(dev, 1, "Fourcc format (0x%08x) unknown.\n", mp->pixelformat); mp->pixelformat = V4L2_PIX_FMT_YUYV; fmt = vivid_get_format(dev, mp->pixelformat); } mp->field = vivid_field_out(dev, mp->field); if (vivid_is_svid_out(dev)) { w = 720; h = (dev->std_out & V4L2_STD_525_60) ? 480 : 576; } else { w = dev->sink_rect.width; h = dev->sink_rect.height; } if (V4L2_FIELD_HAS_T_OR_B(mp->field)) factor = 2; if (!dev->has_scaler_out && !dev->has_crop_out && !dev->has_compose_out) { mp->width = w; mp->height = h / factor; } else { struct v4l2_rect r = { 0, 0, mp->width, mp->height * factor }; v4l2_rect_set_min_size(&r, &vivid_min_rect); v4l2_rect_set_max_size(&r, &vivid_max_rect); if (dev->has_scaler_out && !dev->has_crop_out) { struct v4l2_rect max_r = { 0, 0, MAX_ZOOM * w, MAX_ZOOM * h }; v4l2_rect_set_max_size(&r, &max_r); } else if (!dev->has_scaler_out && dev->has_compose_out && !dev->has_crop_out) { v4l2_rect_set_max_size(&r, &dev->sink_rect); } else if (!dev->has_scaler_out && !dev->has_compose_out) { v4l2_rect_set_min_size(&r, &dev->sink_rect); } mp->width = r.width; mp->height = r.height / factor; } /* This driver supports custom bytesperline values */ mp->num_planes = fmt->buffers; for (p = 0; p < fmt->buffers; p++) { /* Calculate the minimum supported bytesperline value */ bytesperline = (mp->width * fmt->bit_depth[p]) >> 3; /* Calculate the maximum supported bytesperline value */ max_bpl = (MAX_ZOOM * MAX_WIDTH * fmt->bit_depth[p]) >> 3; if (pfmt[p].bytesperline > max_bpl) pfmt[p].bytesperline = max_bpl; if (pfmt[p].bytesperline < bytesperline) pfmt[p].bytesperline = bytesperline; pfmt[p].sizeimage = (pfmt[p].bytesperline * mp->height) / fmt->vdownsampling[p] + fmt->data_offset[p]; memset(pfmt[p].reserved, 0, sizeof(pfmt[p].reserved)); } for (p = fmt->buffers; p < fmt->planes; p++) pfmt[0].sizeimage += (pfmt[0].bytesperline * mp->height * (fmt->bit_depth[p] / fmt->vdownsampling[p])) / (fmt->bit_depth[0] / fmt->vdownsampling[0]); mp->xfer_func = V4L2_XFER_FUNC_DEFAULT; mp->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; mp->quantization = V4L2_QUANTIZATION_DEFAULT; if (vivid_is_svid_out(dev)) { mp->colorspace = V4L2_COLORSPACE_SMPTE170M; } else if (dev->dvi_d_out || !(bt->flags & V4L2_DV_FL_IS_CE_VIDEO)) { mp->colorspace = V4L2_COLORSPACE_SRGB; if (dev->dvi_d_out) mp->quantization = V4L2_QUANTIZATION_LIM_RANGE; } else if (bt->width == 720 && bt->height <= 576) { mp->colorspace = V4L2_COLORSPACE_SMPTE170M; } else if (mp->colorspace != V4L2_COLORSPACE_SMPTE170M && mp->colorspace != V4L2_COLORSPACE_REC709 && mp->colorspace != V4L2_COLORSPACE_OPRGB && mp->colorspace != V4L2_COLORSPACE_BT2020 && mp->colorspace != V4L2_COLORSPACE_SRGB) { mp->colorspace = V4L2_COLORSPACE_REC709; } memset(mp->reserved, 0, sizeof(mp->reserved)); return 0; } int vivid_s_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_out; struct v4l2_rect *compose = &dev->compose_out; struct vb2_queue *q = &dev->vb_vid_out_q; int ret = vivid_try_fmt_vid_out(file, priv, f); unsigned factor = 1; unsigned p; if (ret < 0) return ret; if (vb2_is_busy(q) && (vivid_is_svid_out(dev) || mp->width != dev->fmt_out_rect.width || mp->height != dev->fmt_out_rect.height || mp->pixelformat != dev->fmt_out->fourcc || mp->field != dev->field_out)) { dprintk(dev, 1, "%s device busy\n", __func__); return -EBUSY; } /* * Allow for changing the colorspace on the fly. Useful for testing * purposes, and it is something that HDMI transmitters are able * to do. */ if (vb2_is_busy(q)) goto set_colorspace; dev->fmt_out = vivid_get_format(dev, mp->pixelformat); if (V4L2_FIELD_HAS_T_OR_B(mp->field)) factor = 2; if (dev->has_scaler_out || dev->has_crop_out || dev->has_compose_out) { struct v4l2_rect r = { 0, 0, mp->width, mp->height }; if (dev->has_scaler_out) { if (dev->has_crop_out) v4l2_rect_map_inside(crop, &r); else *crop = r; if (dev->has_compose_out && !dev->has_crop_out) { struct v4l2_rect min_r = { 0, 0, r.width / MAX_ZOOM, factor * r.height / MAX_ZOOM }; struct v4l2_rect max_r = { 0, 0, r.width * MAX_ZOOM, factor * r.height * MAX_ZOOM }; v4l2_rect_set_min_size(compose, &min_r); v4l2_rect_set_max_size(compose, &max_r); v4l2_rect_map_inside(compose, &dev->compose_bounds_out); } else if (dev->has_compose_out) { struct v4l2_rect min_r = { 0, 0, crop->width / MAX_ZOOM, factor * crop->height / MAX_ZOOM }; struct v4l2_rect max_r = { 0, 0, crop->width * MAX_ZOOM, factor * crop->height * MAX_ZOOM }; v4l2_rect_set_min_size(compose, &min_r); v4l2_rect_set_max_size(compose, &max_r); v4l2_rect_map_inside(compose, &dev->compose_bounds_out); } } else if (dev->has_compose_out && !dev->has_crop_out) { v4l2_rect_set_size_to(crop, &r); r.height *= factor; v4l2_rect_set_size_to(compose, &r); v4l2_rect_map_inside(compose, &dev->compose_bounds_out); } else if (!dev->has_compose_out) { v4l2_rect_map_inside(crop, &r); r.height /= factor; v4l2_rect_set_size_to(compose, &r); } else { r.height *= factor; v4l2_rect_set_max_size(compose, &r); v4l2_rect_map_inside(compose, &dev->compose_bounds_out); crop->top *= factor; crop->height *= factor; v4l2_rect_set_size_to(crop, compose); v4l2_rect_map_inside(crop, &r); crop->top /= factor; crop->height /= factor; } } else { struct v4l2_rect r = { 0, 0, mp->width, mp->height }; v4l2_rect_set_size_to(crop, &r); r.height /= factor; v4l2_rect_set_size_to(compose, &r); } dev->fmt_out_rect.width = mp->width; dev->fmt_out_rect.height = mp->height; for (p = 0; p < mp->num_planes; p++) dev->bytesperline_out[p] = mp->plane_fmt[p].bytesperline; for (p = dev->fmt_out->buffers; p < dev->fmt_out->planes; p++) dev->bytesperline_out[p] = (dev->bytesperline_out[0] * dev->fmt_out->bit_depth[p]) / dev->fmt_out->bit_depth[0]; dev->field_out = mp->field; if (vivid_is_svid_out(dev)) dev->tv_field_out = mp->field; set_colorspace: dev->colorspace_out = mp->colorspace; dev->xfer_func_out = mp->xfer_func; dev->ycbcr_enc_out = mp->ycbcr_enc; dev->quantization_out = mp->quantization; struct vivid_dev *in_dev = vivid_output_is_connected_to(dev); if (in_dev) { vivid_send_source_change(in_dev, SVID); vivid_send_source_change(in_dev, HDMI); } return 0; } int vidioc_g_fmt_vid_out_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_g_fmt_vid_out(file, priv, f); } int vidioc_try_fmt_vid_out_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_try_fmt_vid_out(file, priv, f); } int vidioc_s_fmt_vid_out_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_s_fmt_vid_out(file, priv, f); } int vidioc_g_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_g_fmt_vid_out); } int vidioc_try_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_try_fmt_vid_out); } int vidioc_s_fmt_vid_out(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_s_fmt_vid_out); } int vivid_vid_out_g_selection(struct file *file, void *priv, struct v4l2_selection *sel) { struct vivid_dev *dev = video_drvdata(file); if (!dev->has_crop_out && !dev->has_compose_out) return -ENOTTY; if (sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) return -EINVAL; sel->r.left = sel->r.top = 0; switch (sel->target) { case V4L2_SEL_TGT_CROP: if (!dev->has_crop_out) return -EINVAL; sel->r = dev->crop_out; break; case V4L2_SEL_TGT_CROP_DEFAULT: if (!dev->has_crop_out) return -EINVAL; sel->r = dev->fmt_out_rect; break; case V4L2_SEL_TGT_CROP_BOUNDS: if (!dev->has_crop_out) return -EINVAL; sel->r = vivid_max_rect; break; case V4L2_SEL_TGT_COMPOSE: if (!dev->has_compose_out) return -EINVAL; sel->r = dev->compose_out; break; case V4L2_SEL_TGT_COMPOSE_DEFAULT: case V4L2_SEL_TGT_COMPOSE_BOUNDS: if (!dev->has_compose_out) return -EINVAL; sel->r = dev->sink_rect; break; default: return -EINVAL; } return 0; } int vivid_vid_out_s_selection(struct file *file, void *fh, struct v4l2_selection *s) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_out; struct v4l2_rect *compose = &dev->compose_out; unsigned factor = V4L2_FIELD_HAS_T_OR_B(dev->field_out) ? 2 : 1; int ret; if (!dev->has_crop_out && !dev->has_compose_out) return -ENOTTY; if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) return -EINVAL; switch (s->target) { case V4L2_SEL_TGT_CROP: if (!dev->has_crop_out) return -EINVAL; ret = vivid_vid_adjust_sel(s->flags, &s->r); if (ret) return ret; v4l2_rect_set_min_size(&s->r, &vivid_min_rect); v4l2_rect_set_max_size(&s->r, &dev->fmt_out_rect); if (dev->has_scaler_out) { struct v4l2_rect max_rect = { 0, 0, dev->sink_rect.width * MAX_ZOOM, (dev->sink_rect.height / factor) * MAX_ZOOM }; v4l2_rect_set_max_size(&s->r, &max_rect); if (dev->has_compose_out) { struct v4l2_rect min_rect = { 0, 0, s->r.width / MAX_ZOOM, (s->r.height * factor) / MAX_ZOOM }; struct v4l2_rect max_rect = { 0, 0, s->r.width * MAX_ZOOM, (s->r.height * factor) * MAX_ZOOM }; v4l2_rect_set_min_size(compose, &min_rect); v4l2_rect_set_max_size(compose, &max_rect); v4l2_rect_map_inside(compose, &dev->compose_bounds_out); } } else if (dev->has_compose_out) { s->r.top *= factor; s->r.height *= factor; v4l2_rect_set_max_size(&s->r, &dev->sink_rect); v4l2_rect_set_size_to(compose, &s->r); v4l2_rect_map_inside(compose, &dev->compose_bounds_out); s->r.top /= factor; s->r.height /= factor; } else { v4l2_rect_set_size_to(&s->r, &dev->sink_rect); s->r.height /= factor; } v4l2_rect_map_inside(&s->r, &dev->fmt_out_rect); *crop = s->r; break; case V4L2_SEL_TGT_COMPOSE: if (!dev->has_compose_out) return -EINVAL; ret = vivid_vid_adjust_sel(s->flags, &s->r); if (ret) return ret; v4l2_rect_set_min_size(&s->r, &vivid_min_rect); v4l2_rect_set_max_size(&s->r, &dev->sink_rect); v4l2_rect_map_inside(&s->r, &dev->compose_bounds_out); s->r.top /= factor; s->r.height /= factor; if (dev->has_scaler_out) { struct v4l2_rect fmt = dev->fmt_out_rect; struct v4l2_rect max_rect = { 0, 0, s->r.width * MAX_ZOOM, s->r.height * MAX_ZOOM }; struct v4l2_rect min_rect = { 0, 0, s->r.width / MAX_ZOOM, s->r.height / MAX_ZOOM }; v4l2_rect_set_min_size(&fmt, &min_rect); if (!dev->has_crop_out) v4l2_rect_set_max_size(&fmt, &max_rect); if (!v4l2_rect_same_size(&dev->fmt_out_rect, &fmt) && vb2_is_busy(&dev->vb_vid_out_q)) return -EBUSY; if (dev->has_crop_out) { v4l2_rect_set_min_size(crop, &min_rect); v4l2_rect_set_max_size(crop, &max_rect); } dev->fmt_out_rect = fmt; } else if (dev->has_crop_out) { struct v4l2_rect fmt = dev->fmt_out_rect; v4l2_rect_set_min_size(&fmt, &s->r); if (!v4l2_rect_same_size(&dev->fmt_out_rect, &fmt) && vb2_is_busy(&dev->vb_vid_out_q)) return -EBUSY; dev->fmt_out_rect = fmt; v4l2_rect_set_size_to(crop, &s->r); v4l2_rect_map_inside(crop, &dev->fmt_out_rect); } else { if (!v4l2_rect_same_size(&s->r, &dev->fmt_out_rect) && vb2_is_busy(&dev->vb_vid_out_q)) return -EBUSY; v4l2_rect_set_size_to(&dev->fmt_out_rect, &s->r); v4l2_rect_set_size_to(crop, &s->r); crop->height /= factor; v4l2_rect_map_inside(crop, &dev->fmt_out_rect); } s->r.top *= factor; s->r.height *= factor; *compose = s->r; break; default: return -EINVAL; } return 0; } int vivid_vid_out_g_pixelaspect(struct file *file, void *priv, int type, struct v4l2_fract *f) { struct vivid_dev *dev = video_drvdata(file); if (type != V4L2_BUF_TYPE_VIDEO_OUTPUT) return -EINVAL; switch (vivid_get_pixel_aspect(dev)) { case TPG_PIXEL_ASPECT_NTSC: f->numerator = 11; f->denominator = 10; break; case TPG_PIXEL_ASPECT_PAL: f->numerator = 54; f->denominator = 59; break; default: break; } return 0; } int vidioc_g_fmt_vid_out_overlay(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); const struct v4l2_rect *compose = &dev->compose_out; struct v4l2_window *win = &f->fmt.win; if (!dev->has_fb) return -EINVAL; win->w.top = dev->overlay_out_top; win->w.left = dev->overlay_out_left; win->w.width = compose->width; win->w.height = compose->height; win->field = V4L2_FIELD_ANY; win->chromakey = dev->chromakey_out; win->global_alpha = dev->global_alpha_out; return 0; } int vidioc_try_fmt_vid_out_overlay(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); const struct v4l2_rect *compose = &dev->compose_out; struct v4l2_window *win = &f->fmt.win; if (!dev->has_fb) return -EINVAL; win->w.left = clamp_t(int, win->w.left, -dev->display_width, dev->display_width); win->w.top = clamp_t(int, win->w.top, -dev->display_height, dev->display_height); win->w.width = compose->width; win->w.height = compose->height; /* * It makes no sense for an OSD to overlay only top or bottom fields, * so always set this to ANY. */ win->field = V4L2_FIELD_ANY; return 0; } int vidioc_s_fmt_vid_out_overlay(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_window *win = &f->fmt.win; int ret = vidioc_try_fmt_vid_out_overlay(file, priv, f); if (ret) return ret; dev->overlay_out_top = win->w.top; dev->overlay_out_left = win->w.left; dev->chromakey_out = win->chromakey; dev->global_alpha_out = win->global_alpha; return ret; } int vivid_vid_out_overlay(struct file *file, void *fh, unsigned i) { struct vivid_dev *dev = video_drvdata(file); if (i && !dev->fmt_out->can_do_overlay) { dprintk(dev, 1, "unsupported output format for output overlay\n"); return -EINVAL; } dev->overlay_out_enabled = i; return 0; } int vivid_vid_out_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *a) { struct vivid_dev *dev = video_drvdata(file); a->capability = V4L2_FBUF_CAP_EXTERNOVERLAY | V4L2_FBUF_CAP_CHROMAKEY | V4L2_FBUF_CAP_SRC_CHROMAKEY | V4L2_FBUF_CAP_GLOBAL_ALPHA | V4L2_FBUF_CAP_LOCAL_ALPHA | V4L2_FBUF_CAP_LOCAL_INV_ALPHA; a->flags = V4L2_FBUF_FLAG_OVERLAY | dev->fbuf_out_flags; a->base = (void *)dev->video_pbase; a->fmt.width = dev->display_width; a->fmt.height = dev->display_height; if (dev->fb_defined.green.length == 5) a->fmt.pixelformat = V4L2_PIX_FMT_ARGB555; else a->fmt.pixelformat = V4L2_PIX_FMT_RGB565; a->fmt.bytesperline = dev->display_byte_stride; a->fmt.sizeimage = a->fmt.height * a->fmt.bytesperline; a->fmt.field = V4L2_FIELD_NONE; a->fmt.colorspace = V4L2_COLORSPACE_SRGB; a->fmt.priv = 0; return 0; } int vivid_vid_out_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffer *a) { struct vivid_dev *dev = video_drvdata(file); const unsigned chroma_flags = V4L2_FBUF_FLAG_CHROMAKEY | V4L2_FBUF_FLAG_SRC_CHROMAKEY; const unsigned alpha_flags = V4L2_FBUF_FLAG_GLOBAL_ALPHA | V4L2_FBUF_FLAG_LOCAL_ALPHA | V4L2_FBUF_FLAG_LOCAL_INV_ALPHA; if ((a->flags & chroma_flags) == chroma_flags) return -EINVAL; switch (a->flags & alpha_flags) { case 0: case V4L2_FBUF_FLAG_GLOBAL_ALPHA: case V4L2_FBUF_FLAG_LOCAL_ALPHA: case V4L2_FBUF_FLAG_LOCAL_INV_ALPHA: break; default: return -EINVAL; } dev->fbuf_out_flags &= ~(chroma_flags | alpha_flags); dev->fbuf_out_flags |= a->flags & (chroma_flags | alpha_flags); return 0; } static const struct v4l2_audioout vivid_audio_outputs[] = { { 0, "Line-Out 1" }, { 1, "Line-Out 2" }, }; int vidioc_enum_output(struct file *file, void *priv, struct v4l2_output *out) { struct vivid_dev *dev = video_drvdata(file); if (out->index >= dev->num_outputs) return -EINVAL; out->type = V4L2_OUTPUT_TYPE_ANALOG; switch (dev->output_type[out->index]) { case SVID: snprintf(out->name, sizeof(out->name), "S-Video %03u-%u", dev->inst, dev->output_name_counter[out->index]); out->std = V4L2_STD_ALL; if (dev->has_audio_outputs) out->audioset = (1 << ARRAY_SIZE(vivid_audio_outputs)) - 1; out->capabilities = V4L2_OUT_CAP_STD; break; case HDMI: snprintf(out->name, sizeof(out->name), "HDMI %03u-%u", dev->inst, dev->output_name_counter[out->index]); out->capabilities = V4L2_OUT_CAP_DV_TIMINGS; break; } return 0; } int vidioc_g_output(struct file *file, void *priv, unsigned *o) { struct vivid_dev *dev = video_drvdata(file); *o = dev->output; return 0; } int vidioc_s_output(struct file *file, void *priv, unsigned o) { struct vivid_dev *dev = video_drvdata(file); if (o >= dev->num_outputs) return -EINVAL; if (o == dev->output) return 0; if (vb2_is_busy(&dev->vb_vid_out_q) || vb2_is_busy(&dev->vb_vbi_out_q) || vb2_is_busy(&dev->vb_meta_out_q)) return -EBUSY; dev->output = o; dev->tv_audio_output = 0; if (dev->output_type[o] == SVID) dev->vid_out_dev.tvnorms = V4L2_STD_ALL; else dev->vid_out_dev.tvnorms = 0; dev->vbi_out_dev.tvnorms = dev->vid_out_dev.tvnorms; dev->meta_out_dev.tvnorms = dev->vid_out_dev.tvnorms; vivid_update_format_out(dev); return 0; } int vidioc_enumaudout(struct file *file, void *fh, struct v4l2_audioout *vout) { if (vout->index >= ARRAY_SIZE(vivid_audio_outputs)) return -EINVAL; *vout = vivid_audio_outputs[vout->index]; return 0; } int vidioc_g_audout(struct file *file, void *fh, struct v4l2_audioout *vout) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_svid_out(dev)) return -EINVAL; *vout = vivid_audio_outputs[dev->tv_audio_output]; return 0; } int vidioc_s_audout(struct file *file, void *fh, const struct v4l2_audioout *vout) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_svid_out(dev)) return -EINVAL; if (vout->index >= ARRAY_SIZE(vivid_audio_outputs)) return -EINVAL; dev->tv_audio_output = vout->index; return 0; } int vivid_vid_out_s_std(struct file *file, void *priv, v4l2_std_id id) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_svid_out(dev)) return -ENODATA; if (dev->std_out == id) return 0; if (vb2_is_busy(&dev->vb_vid_out_q) || vb2_is_busy(&dev->vb_vbi_out_q)) return -EBUSY; dev->std_out = id; vivid_update_format_out(dev); return 0; } static bool valid_cvt_gtf_timings(struct v4l2_dv_timings *timings) { struct v4l2_bt_timings *bt = &timings->bt; if ((bt->standards & (V4L2_DV_BT_STD_CVT | V4L2_DV_BT_STD_GTF)) && v4l2_valid_dv_timings(timings, &vivid_dv_timings_cap, NULL, NULL)) return true; return false; } int vivid_vid_out_s_dv_timings(struct file *file, void *_fh, struct v4l2_dv_timings *timings) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_hdmi_out(dev)) return -ENODATA; if (!v4l2_find_dv_timings_cap(timings, &vivid_dv_timings_cap, 0, NULL, NULL) && !valid_cvt_gtf_timings(timings)) return -EINVAL; if (v4l2_match_dv_timings(timings, &dev->dv_timings_out, 0, true)) return 0; if (vb2_is_busy(&dev->vb_vid_out_q)) return -EBUSY; dev->dv_timings_out = *timings; vivid_update_format_out(dev); return 0; } int vivid_vid_out_g_parm(struct file *file, void *priv, struct v4l2_streamparm *parm) { struct vivid_dev *dev = video_drvdata(file); if (parm->type != (dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE : V4L2_BUF_TYPE_VIDEO_OUTPUT)) return -EINVAL; parm->parm.output.capability = V4L2_CAP_TIMEPERFRAME; parm->parm.output.timeperframe = dev->timeperframe_vid_out; parm->parm.output.writebuffers = 1; return 0; } int vidioc_subscribe_event(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { switch (sub->type) { case V4L2_EVENT_SOURCE_CHANGE: if (fh->vdev->vfl_dir == VFL_DIR_RX) return v4l2_src_change_event_subscribe(fh, sub); break; default: return v4l2_ctrl_subscribe_event(fh, sub); } return -EINVAL; } |
10 10 10 4 10 8 8 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | // SPDX-License-Identifier: (GPL-2.0-only OR Apache-2.0) /* * Generic implementation of the BLAKE2b digest algorithm. Based on the BLAKE2b * reference implementation, but it has been heavily modified for use in the * kernel. The reference implementation was: * * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under * the terms of the CC0, the OpenSSL Licence, or the Apache Public License * 2.0, at your option. The terms of these licenses can be found at: * * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 * - OpenSSL license : https://www.openssl.org/source/license.html * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 * * More information about BLAKE2 can be found at https://blake2.net. */ #include <linux/unaligned.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/bitops.h> #include <crypto/internal/blake2b.h> #include <crypto/internal/hash.h> static const u8 blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; static void blake2b_increment_counter(struct blake2b_state *S, const u64 inc) { S->t[0] += inc; S->t[1] += (S->t[0] < inc); } #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ d = ror64(d ^ a, 32); \ c = c + d; \ b = ror64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ d = ror64(d ^ a, 16); \ c = c + d; \ b = ror64(b ^ c, 63); \ } while (0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while (0) static void blake2b_compress_one_generic(struct blake2b_state *S, const u8 block[BLAKE2B_BLOCK_SIZE]) { u64 m[16]; u64 v[16]; size_t i; for (i = 0; i < 16; ++i) m[i] = get_unaligned_le64(block + i * sizeof(m[i])); for (i = 0; i < 8; ++i) v[i] = S->h[i]; v[ 8] = BLAKE2B_IV0; v[ 9] = BLAKE2B_IV1; v[10] = BLAKE2B_IV2; v[11] = BLAKE2B_IV3; v[12] = BLAKE2B_IV4 ^ S->t[0]; v[13] = BLAKE2B_IV5 ^ S->t[1]; v[14] = BLAKE2B_IV6 ^ S->f[0]; v[15] = BLAKE2B_IV7 ^ S->f[1]; ROUND(0); ROUND(1); ROUND(2); ROUND(3); ROUND(4); ROUND(5); ROUND(6); ROUND(7); ROUND(8); ROUND(9); ROUND(10); ROUND(11); #ifdef CONFIG_CC_IS_CLANG #pragma nounroll /* https://llvm.org/pr45803 */ #endif for (i = 0; i < 8; ++i) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; } #undef G #undef ROUND void blake2b_compress_generic(struct blake2b_state *state, const u8 *block, size_t nblocks, u32 inc) { do { blake2b_increment_counter(state, inc); blake2b_compress_one_generic(state, block); block += BLAKE2B_BLOCK_SIZE; } while (--nblocks); } EXPORT_SYMBOL(blake2b_compress_generic); static int crypto_blake2b_update_generic(struct shash_desc *desc, const u8 *in, unsigned int inlen) { return crypto_blake2b_update(desc, in, inlen, blake2b_compress_generic); } static int crypto_blake2b_final_generic(struct shash_desc *desc, u8 *out) { return crypto_blake2b_final(desc, out, blake2b_compress_generic); } #define BLAKE2B_ALG(name, driver_name, digest_size) \ { \ .base.cra_name = name, \ .base.cra_driver_name = driver_name, \ .base.cra_priority = 100, \ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \ .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \ .base.cra_module = THIS_MODULE, \ .digestsize = digest_size, \ .setkey = crypto_blake2b_setkey, \ .init = crypto_blake2b_init, \ .update = crypto_blake2b_update_generic, \ .final = crypto_blake2b_final_generic, \ .descsize = sizeof(struct blake2b_state), \ } static struct shash_alg blake2b_algs[] = { BLAKE2B_ALG("blake2b-160", "blake2b-160-generic", BLAKE2B_160_HASH_SIZE), BLAKE2B_ALG("blake2b-256", "blake2b-256-generic", BLAKE2B_256_HASH_SIZE), BLAKE2B_ALG("blake2b-384", "blake2b-384-generic", BLAKE2B_384_HASH_SIZE), BLAKE2B_ALG("blake2b-512", "blake2b-512-generic", BLAKE2B_512_HASH_SIZE), }; static int __init blake2b_mod_init(void) { return crypto_register_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs)); } static void __exit blake2b_mod_fini(void) { crypto_unregister_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs)); } subsys_initcall(blake2b_mod_init); module_exit(blake2b_mod_fini); MODULE_AUTHOR("David Sterba <kdave@kernel.org>"); MODULE_DESCRIPTION("BLAKE2b generic implementation"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("blake2b-160"); MODULE_ALIAS_CRYPTO("blake2b-160-generic"); MODULE_ALIAS_CRYPTO("blake2b-256"); MODULE_ALIAS_CRYPTO("blake2b-256-generic"); MODULE_ALIAS_CRYPTO("blake2b-384"); MODULE_ALIAS_CRYPTO("blake2b-384-generic"); MODULE_ALIAS_CRYPTO("blake2b-512"); MODULE_ALIAS_CRYPTO("blake2b-512-generic"); |
8 8 8 8 8 17 8 8 8 8 19 17 17 17 17 17 17 17 17 17 17 17 17 17 17 1 16 16 17 17 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 | /* SPDX-License-Identifier: GPL-2.0 * * page_pool.c * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> * Copyright (C) 2016 Red Hat, Inc. */ #include <linux/error-injection.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/device.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/page-flags.h> #include <linux/mm.h> /* for put_page() */ #include <linux/poison.h> #include <linux/ethtool.h> #include <linux/netdevice.h> #include <trace/events/page_pool.h> #include "mp_dmabuf_devmem.h" #include "netmem_priv.h" #include "page_pool_priv.h" DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) #define BIAS_MAX (LONG_MAX >> 1) #ifdef CONFIG_PAGE_POOL_STATS static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats); /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) /* recycle_stat_inc is safe to use when preemption is possible. */ #define recycle_stat_inc(pool, __stat) \ do { \ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ this_cpu_inc(s->__stat); \ } while (0) #define recycle_stat_add(pool, __stat, val) \ do { \ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ this_cpu_add(s->__stat, val); \ } while (0) static const char pp_stats[][ETH_GSTRING_LEN] = { "rx_pp_alloc_fast", "rx_pp_alloc_slow", "rx_pp_alloc_slow_ho", "rx_pp_alloc_empty", "rx_pp_alloc_refill", "rx_pp_alloc_waive", "rx_pp_recycle_cached", "rx_pp_recycle_cache_full", "rx_pp_recycle_ring", "rx_pp_recycle_ring_full", "rx_pp_recycle_released_ref", }; /** * page_pool_get_stats() - fetch page pool stats * @pool: pool from which page was allocated * @stats: struct page_pool_stats to fill in * * Retrieve statistics about the page_pool. This API is only available * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``. * A pointer to a caller allocated struct page_pool_stats structure * is passed to this API which is filled in. The caller can then report * those stats to the user (perhaps via ethtool, debugfs, etc.). */ bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats) { int cpu = 0; if (!stats) return false; /* The caller is responsible to initialize stats. */ stats->alloc_stats.fast += pool->alloc_stats.fast; stats->alloc_stats.slow += pool->alloc_stats.slow; stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order; stats->alloc_stats.empty += pool->alloc_stats.empty; stats->alloc_stats.refill += pool->alloc_stats.refill; stats->alloc_stats.waive += pool->alloc_stats.waive; for_each_possible_cpu(cpu) { const struct page_pool_recycle_stats *pcpu = per_cpu_ptr(pool->recycle_stats, cpu); stats->recycle_stats.cached += pcpu->cached; stats->recycle_stats.cache_full += pcpu->cache_full; stats->recycle_stats.ring += pcpu->ring; stats->recycle_stats.ring_full += pcpu->ring_full; stats->recycle_stats.released_refcnt += pcpu->released_refcnt; } return true; } EXPORT_SYMBOL(page_pool_get_stats); u8 *page_pool_ethtool_stats_get_strings(u8 *data) { int i; for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { memcpy(data, pp_stats[i], ETH_GSTRING_LEN); data += ETH_GSTRING_LEN; } return data; } EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); int page_pool_ethtool_stats_get_count(void) { return ARRAY_SIZE(pp_stats); } EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { const struct page_pool_stats *pool_stats = stats; *data++ = pool_stats->alloc_stats.fast; *data++ = pool_stats->alloc_stats.slow; *data++ = pool_stats->alloc_stats.slow_high_order; *data++ = pool_stats->alloc_stats.empty; *data++ = pool_stats->alloc_stats.refill; *data++ = pool_stats->alloc_stats.waive; *data++ = pool_stats->recycle_stats.cached; *data++ = pool_stats->recycle_stats.cache_full; *data++ = pool_stats->recycle_stats.ring; *data++ = pool_stats->recycle_stats.ring_full; *data++ = pool_stats->recycle_stats.released_refcnt; return data; } EXPORT_SYMBOL(page_pool_ethtool_stats_get); #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) #define recycle_stat_add(pool, __stat, val) #endif static bool page_pool_producer_lock(struct page_pool *pool) __acquires(&pool->ring.producer_lock) { bool in_softirq = in_softirq(); if (in_softirq) spin_lock(&pool->ring.producer_lock); else spin_lock_bh(&pool->ring.producer_lock); return in_softirq; } static void page_pool_producer_unlock(struct page_pool *pool, bool in_softirq) __releases(&pool->ring.producer_lock) { if (in_softirq) spin_unlock(&pool->ring.producer_lock); else spin_unlock_bh(&pool->ring.producer_lock); } static void page_pool_struct_check(void) { CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, PAGE_POOL_FRAG_GROUP_ALIGN); } static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params, int cpuid) { unsigned int ring_qsize = 1024; /* Default */ struct netdev_rx_queue *rxq; int err; page_pool |